Update to Linux 2.6.15.

author cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>

Wed, 1 Feb 2006 18:00:19 +0000 (18:00 +0000)

committer cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>

Wed, 1 Feb 2006 18:00:19 +0000 (18:00 +0000)
author cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
Wed, 1 Feb 2006 18:00:19 +0000 (18:00 +0000)
committer cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
Wed, 1 Feb 2006 18:00:19 +0000 (18:00 +0000)
diff --git a/buildconfigs/linux-defconfig_xen0_x86_32 b/buildconfigs/linux-defconfig_xen0_x86_32

index a8efa792b87260919c4a3e91382122176e94c2b2..fa052b8ea9f91bed5f62a5f0be56b38d5dc24ea3 100644 (file)
--- a/buildconfigs/linux-defconfig_xen0_x86_32
+++ b/buildconfigs/linux-defconfig_xen0_x86_32
@@ -1,10 +1,11 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen0
-# Tue Jan 31 18:56:38 2006
+# Linux kernel version: 2.6.15-xen0
+# Wed Feb  1 15:54:13 2006
  #
-CONFIG_X86=y
+CONFIG_X86_32=y
  CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
  CONFIG_MMU=y
  CONFIG_UID16=y
  CONFIG_GENERIC_ISA_DMA=y
@@ -35,6 +36,7 @@ CONFIG_HOTPLUG=y
  CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
  CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  # CONFIG_EMBEDDED is not set
  CONFIG_KALLSYMS=y
  # CONFIG_KALLSYMS_ALL is not set
@@ -63,6 +65,24 @@ CONFIG_OBSOLETE_MODPARM=y
  # CONFIG_MODULE_SRCVERSION_ALL is not set
  CONFIG_KMOD=y
  
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
  #
  # Processor type and features
  #
@@ -108,8 +128,10 @@ CONFIG_X86_WP_WORKS_OK=y
  CONFIG_X86_INVLPG=y
  CONFIG_X86_BSWAP=y
  CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
  CONFIG_X86_GOOD_APIC=y
  CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
  # CONFIG_SMP is not set
  CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
@@ -142,6 +164,7 @@ CONFIG_FLATMEM_MANUAL=y
  CONFIG_FLATMEM=y
  CONFIG_FLAT_NODE_MEM_MAP=y
  # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
  CONFIG_MTRR=y
  # CONFIG_REGPARM is not set
  CONFIG_SECCOMP=y
@@ -257,6 +280,10 @@ CONFIG_TCP_CONG_BIC=y
  CONFIG_NETFILTER=y
  # CONFIG_NETFILTER_DEBUG is not set
  CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
  # CONFIG_NETFILTER_NETLINK is not set
  
  #
@@ -346,8 +373,11 @@ CONFIG_BRIDGE=y
  # CONFIG_NET_DIVERT is not set
  # CONFIG_ECONET is not set
  # CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
  # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
  
  #
  # Network testing
@@ -409,16 +439,7 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_COUNT=16
  CONFIG_BLK_DEV_RAM_SIZE=4096
  CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
  # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
  # CONFIG_ATA_OVER_ETH is not set
  
  #
@@ -464,6 +485,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
  # CONFIG_BLK_DEV_CY82C693 is not set
  # CONFIG_BLK_DEV_CS5520 is not set
  # CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_CS5535 is not set
  # CONFIG_BLK_DEV_HPT34X is not set
  # CONFIG_BLK_DEV_HPT366 is not set
  # CONFIG_BLK_DEV_SC1200 is not set
@@ -519,6 +541,7 @@ CONFIG_SCSI_SPI_ATTRS=y
  #
  # SCSI low-level drivers
  #
+# CONFIG_ISCSI_TCP is not set
  CONFIG_BLK_DEV_3W_XXXX_RAID=y
  # CONFIG_SCSI_3W_9XXX is not set
  # CONFIG_SCSI_ACARD is not set
@@ -548,16 +571,17 @@ CONFIG_SCSI_SATA=y
  CONFIG_SCSI_ATA_PIIX=y
  # CONFIG_SCSI_SATA_MV is not set
  # CONFIG_SCSI_SATA_NV is not set
-CONFIG_SCSI_SATA_PROMISE=y
+# CONFIG_SCSI_PDC_ADMA is not set
  # CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
  CONFIG_SCSI_SATA_SX4=y
  CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
  # CONFIG_SCSI_SATA_SIS is not set
  # CONFIG_SCSI_SATA_ULI is not set
  # CONFIG_SCSI_SATA_VIA is not set
  # CONFIG_SCSI_SATA_VITESSE is not set
  CONFIG_SCSI_SATA_INTEL_COMBINED=y
-# CONFIG_SCSI_CPQFCTS is not set
  # CONFIG_SCSI_DMX3191D is not set
  # CONFIG_SCSI_EATA_PIO is not set
  # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -566,7 +590,6 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
  # CONFIG_SCSI_INIA100 is not set
  # CONFIG_SCSI_SYM53C8XX_2 is not set
  # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
  # CONFIG_SCSI_QLOGIC_FC is not set
  # CONFIG_SCSI_QLOGIC_1280 is not set
  CONFIG_SCSI_QLA2XXX=y
@@ -807,7 +830,6 @@ CONFIG_HW_CONSOLE=y
  #
  # Serial drivers
  #
-# CONFIG_SERIAL_8250 is not set
  
  #
  # Non-8250 serial port support
@@ -870,6 +892,7 @@ CONFIG_DRM_SIS=m
  # TPM devices
  #
  # CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
  
  #
  # I2C support
@@ -954,12 +977,15 @@ CONFIG_USB_UHCI_HCD=y
  #
  # USB Device Class drivers
  #
-# CONFIG_USB_BLUETOOTH_TTY is not set
  # CONFIG_USB_ACM is not set
  # CONFIG_USB_PRINTER is not set
  
  #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
  #
  # CONFIG_USB_STORAGE is not set
  
@@ -1211,6 +1237,11 @@ CONFIG_NLS_ISO8859_1=y
  # CONFIG_NLS_KOI8_U is not set
  # CONFIG_NLS_UTF8 is not set
  
+#
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
  #
  # Kernel hacking
  #
@@ -1228,10 +1259,11 @@ CONFIG_DETECT_SOFTLOCKUP=y
  CONFIG_DEBUG_BUGVERBOSE=y
  # CONFIG_DEBUG_INFO is not set
  # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
  CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
  CONFIG_EARLY_PRINTK=y
  # CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
  # CONFIG_DEBUG_STACK_USAGE is not set
  # CONFIG_DEBUG_PAGEALLOC is not set
  # CONFIG_4KSTACKS is not set
@@ -1312,4 +1344,3 @@ CONFIG_ZLIB_INFLATE=y
  CONFIG_GENERIC_HARDIRQS=y
  CONFIG_GENERIC_IRQ_PROBE=y
  CONFIG_X86_BIOS_REBOOT=y
-CONFIG_PC=y
diff --git a/buildconfigs/linux-defconfig_xen0_x86_64 b/buildconfigs/linux-defconfig_xen0_x86_64

index e140ede4ade077e4d26c5b6c173f3360033a92ed..460393db98321cb1a05aedafc60c31ebfd14d085 100644 (file)
--- a/buildconfigs/linux-defconfig_xen0_x86_64
+++ b/buildconfigs/linux-defconfig_xen0_x86_64
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen0
-# Tue Jan 31 16:21:00 2006
+# Linux kernel version: 2.6.15-xen0
+# Wed Feb  1 15:50:08 2006
  #
  CONFIG_X86_64=y
  CONFIG_64BIT=y
@@ -40,6 +40,7 @@ CONFIG_HOTPLUG=y
  CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
  CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  # CONFIG_EMBEDDED is not set
  CONFIG_KALLSYMS=y
  # CONFIG_KALLSYMS_ALL is not set
@@ -68,6 +69,24 @@ CONFIG_OBSOLETE_MODPARM=y
  # CONFIG_MODULE_SRCVERSION_ALL is not set
  CONFIG_KMOD=y
  
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
  #
  # Processor type and features
  #
@@ -88,7 +107,6 @@ CONFIG_X86_LOCAL_APIC=y
  CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
-# CONFIG_NUMA is not set
  CONFIG_ARCH_FLATMEM_ENABLE=y
  CONFIG_SELECT_MEMORY_MODEL=y
  CONFIG_FLATMEM_MANUAL=y
@@ -97,6 +115,7 @@ CONFIG_FLATMEM_MANUAL=y
  CONFIG_FLATMEM=y
  CONFIG_FLAT_NODE_MEM_MAP=y
  # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
  CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
  CONFIG_SWIOTLB=y
  CONFIG_DUMMY_IOMMU=y
@@ -196,6 +215,10 @@ CONFIG_TCP_CONG_BIC=y
  CONFIG_NETFILTER=y
  # CONFIG_NETFILTER_DEBUG is not set
  CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
  # CONFIG_NETFILTER_NETLINK is not set
  
  #
@@ -285,8 +308,11 @@ CONFIG_BRIDGE=y
  # CONFIG_NET_DIVERT is not set
  # CONFIG_ECONET is not set
  # CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
  # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
  
  #
  # Network testing
@@ -348,16 +374,7 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_COUNT=16
  CONFIG_BLK_DEV_RAM_SIZE=16384
  CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
  # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
  # CONFIG_ATA_OVER_ETH is not set
  
  #
@@ -458,6 +475,7 @@ CONFIG_SCSI_SPI_ATTRS=y
  #
  # SCSI low-level drivers
  #
+# CONFIG_ISCSI_TCP is not set
  CONFIG_BLK_DEV_3W_XXXX_RAID=y
  # CONFIG_SCSI_3W_9XXX is not set
  # CONFIG_SCSI_ACARD is not set
@@ -488,10 +506,12 @@ CONFIG_SCSI_SATA=y
  CONFIG_SCSI_ATA_PIIX=y
  # CONFIG_SCSI_SATA_MV is not set
  # CONFIG_SCSI_SATA_NV is not set
-CONFIG_SCSI_SATA_PROMISE=y
+# CONFIG_SCSI_PDC_ADMA is not set
  # CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
  CONFIG_SCSI_SATA_SX4=y
  CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
  # CONFIG_SCSI_SATA_SIS is not set
  # CONFIG_SCSI_SATA_ULI is not set
  # CONFIG_SCSI_SATA_VIA is not set
@@ -499,7 +519,6 @@ CONFIG_SCSI_SATA_SIL=y
  CONFIG_SCSI_SATA_INTEL_COMBINED=y
  CONFIG_SCSI_BUSLOGIC=y
  # CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_CPQFCTS is not set
  # CONFIG_SCSI_DMX3191D is not set
  # CONFIG_SCSI_EATA is not set
  # CONFIG_SCSI_EATA_PIO is not set
@@ -510,7 +529,6 @@ CONFIG_SCSI_BUSLOGIC=y
  # CONFIG_SCSI_INIA100 is not set
  # CONFIG_SCSI_SYM53C8XX_2 is not set
  # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
  # CONFIG_SCSI_QLOGIC_FC is not set
  # CONFIG_SCSI_QLOGIC_1280 is not set
  CONFIG_SCSI_QLA2XXX=y
@@ -750,7 +768,6 @@ CONFIG_HW_CONSOLE=y
  #
  # Serial drivers
  #
-# CONFIG_SERIAL_8250 is not set
  
  #
  # Non-8250 serial port support
@@ -800,6 +817,7 @@ CONFIG_DRM_SIS=m
  # TPM devices
  #
  # CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
  
  #
  # I2C support
@@ -884,12 +902,15 @@ CONFIG_USB_UHCI_HCD=y
  #
  # USB Device Class drivers
  #
-# CONFIG_USB_BLUETOOTH_TTY is not set
  # CONFIG_USB_ACM is not set
  # CONFIG_USB_PRINTER is not set
  
  #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
  #
  # CONFIG_USB_STORAGE is not set
  
@@ -989,6 +1010,7 @@ CONFIG_INFINIBAND_MTHCA_DEBUG=y
  CONFIG_INFINIBAND_IPOIB=y
  CONFIG_INFINIBAND_IPOIB_DEBUG=y
  CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
+CONFIG_INFINIBAND_SRP=y
  
  #
  # SN Devices
@@ -1155,9 +1177,10 @@ CONFIG_NLS_ISO8859_1=y
  # CONFIG_NLS_UTF8 is not set
  
  #
-# Profiling support
+# Instrumentation Support
  #
  # CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
  
  #
  # Kernel hacking
@@ -1173,10 +1196,10 @@ CONFIG_DETECT_SOFTLOCKUP=y
  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
  # CONFIG_DEBUG_KOBJECT is not set
  # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
  CONFIG_FRAME_POINTER=y
-# CONFIG_CHECKING is not set
+# CONFIG_RCU_TORTURE_TEST is not set
  # CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
  
  #
  # Security options
diff --git a/buildconfigs/linux-defconfig_xenU_x86_32 b/buildconfigs/linux-defconfig_xenU_x86_32

index b59f48dc1365da5c8d35d9bc856ef7d9511c173c..23cad4e3056907dfd9164a06b49c70b26666076a 100644 (file)
--- a/buildconfigs/linux-defconfig_xenU_x86_32
+++ b/buildconfigs/linux-defconfig_xenU_x86_32
@@ -1,10 +1,11 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xenU
-# Tue Jan 31 18:57:16 2006
+# Linux kernel version: 2.6.15-xenU
+# Wed Feb  1 17:28:35 2006
  #
-CONFIG_X86=y
+CONFIG_X86_32=y
  CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
  CONFIG_MMU=y
  CONFIG_UID16=y
  CONFIG_GENERIC_ISA_DMA=y
@@ -35,6 +36,7 @@ CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
  # CONFIG_CPUSETS is not set
  CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  # CONFIG_EMBEDDED is not set
  CONFIG_KALLSYMS=y
  # CONFIG_KALLSYMS_ALL is not set
@@ -64,6 +66,24 @@ CONFIG_OBSOLETE_MODPARM=y
  CONFIG_KMOD=y
  CONFIG_STOP_MACHINE=y
  
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
  #
  # Processor type and features
  #
@@ -109,8 +129,10 @@ CONFIG_X86_WP_WORKS_OK=y
  CONFIG_X86_INVLPG=y
  CONFIG_X86_BSWAP=y
  CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
  CONFIG_X86_GOOD_APIC=y
  CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
  CONFIG_SMP=y
  CONFIG_SMP_ALTERNATIVES=y
  CONFIG_NR_CPUS=8
@@ -141,6 +163,7 @@ CONFIG_FLATMEM_MANUAL=y
  CONFIG_FLATMEM=y
  CONFIG_FLAT_NODE_MEM_MAP=y
  # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
  # CONFIG_REGPARM is not set
  CONFIG_SECCOMP=y
  # CONFIG_HZ_100 is not set
@@ -212,8 +235,11 @@ CONFIG_TCP_CONG_BIC=y
  # CONFIG_NET_DIVERT is not set
  # CONFIG_ECONET is not set
  # CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
  # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
  
  #
  # Network testing
@@ -248,16 +274,7 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_COUNT=16
  CONFIG_BLK_DEV_RAM_SIZE=4096
  CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
  # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
  # CONFIG_ATA_OVER_ETH is not set
  
  #
@@ -295,6 +312,7 @@ CONFIG_BLK_DEV_SD=m
  #
  # SCSI low-level drivers
  #
+# CONFIG_ISCSI_TCP is not set
  # CONFIG_SCSI_SATA is not set
  # CONFIG_SCSI_DEBUG is not set
  
@@ -502,6 +520,11 @@ CONFIG_NLS_ISO8859_1=y
  # CONFIG_NLS_KOI8_U is not set
  # CONFIG_NLS_UTF8 is not set
  
+#
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
  #
  # Kernel hacking
  #
@@ -519,10 +542,11 @@ CONFIG_DETECT_SOFTLOCKUP=y
  CONFIG_DEBUG_BUGVERBOSE=y
  # CONFIG_DEBUG_INFO is not set
  # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
  CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
  CONFIG_EARLY_PRINTK=y
  # CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
  # CONFIG_DEBUG_STACK_USAGE is not set
  # CONFIG_DEBUG_PAGEALLOC is not set
  # CONFIG_4KSTACKS is not set
@@ -598,4 +622,3 @@ CONFIG_GENERIC_PENDING_IRQ=y
  CONFIG_X86_SMP=y
  CONFIG_X86_BIOS_REBOOT=y
  CONFIG_X86_TRAMPOLINE=y
-CONFIG_PC=y
diff --git a/buildconfigs/linux-defconfig_xenU_x86_64 b/buildconfigs/linux-defconfig_xenU_x86_64

index 32244b357cffb8f1f7683c4cf5a36804ebfe2ef4..c3a8be6b164e7b46a0da1ae95402ba8938514e2f 100644 (file)
--- a/buildconfigs/linux-defconfig_xenU_x86_64
+++ b/buildconfigs/linux-defconfig_xenU_x86_64
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xenU
-# Tue Jan 31 19:51:18 2006
+# Linux kernel version: 2.6.15-xenU
+# Wed Feb  1 15:49:27 2006
  #
  CONFIG_X86_64=y
  CONFIG_64BIT=y
@@ -42,6 +42,7 @@ CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
  # CONFIG_CPUSETS is not set
  CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  # CONFIG_EMBEDDED is not set
  CONFIG_KALLSYMS=y
  # CONFIG_KALLSYMS_ALL is not set
@@ -71,6 +72,24 @@ CONFIG_MODVERSIONS=y
  CONFIG_KMOD=y
  CONFIG_STOP_MACHINE=y
  
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
  #
  # Processor type and features
  #
@@ -90,7 +109,6 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  CONFIG_PREEMPT_BKL=y
-# CONFIG_NUMA is not set
  CONFIG_ARCH_FLATMEM_ENABLE=y
  CONFIG_SELECT_MEMORY_MODEL=y
  CONFIG_FLATMEM_MANUAL=y
@@ -99,6 +117,7 @@ CONFIG_FLATMEM_MANUAL=y
  CONFIG_FLATMEM=y
  CONFIG_FLAT_NODE_MEM_MAP=y
  # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
  CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
  CONFIG_NR_CPUS=8
  # CONFIG_HOTPLUG_CPU is not set
@@ -219,6 +238,10 @@ CONFIG_IPV6_TUNNEL=m
  CONFIG_NETFILTER=y
  # CONFIG_NETFILTER_DEBUG is not set
  CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
  # CONFIG_NETFILTER_NETLINK is not set
  
  #
@@ -384,10 +407,18 @@ CONFIG_IPDDP_DECAP=y
  CONFIG_NET_DIVERT=y
  # CONFIG_ECONET is not set
  CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
  CONFIG_NET_SCHED=y
  CONFIG_NET_SCH_CLK_JIFFIES=y
  # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
  # CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
  CONFIG_NET_SCH_CBQ=m
  CONFIG_NET_SCH_HTB=m
  CONFIG_NET_SCH_HFSC=m
@@ -401,8 +432,10 @@ CONFIG_NET_SCH_GRED=m
  CONFIG_NET_SCH_DSMARK=m
  CONFIG_NET_SCH_NETEM=m
  CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
  CONFIG_NET_CLS=y
  # CONFIG_NET_CLS_BASIC is not set
  CONFIG_NET_CLS_TCINDEX=m
@@ -411,13 +444,14 @@ CONFIG_NET_CLS_ROUTE=y
  CONFIG_NET_CLS_FW=m
  CONFIG_NET_CLS_U32=m
  CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_IND=y
  # CONFIG_CLS_U32_MARK is not set
  CONFIG_NET_CLS_RSVP=m
  CONFIG_NET_CLS_RSVP6=m
  # CONFIG_NET_EMATCH is not set
  # CONFIG_NET_CLS_ACT is not set
  CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_ESTIMATOR=y
  
  #
  # Network testing
@@ -496,7 +530,6 @@ CONFIG_BT_HIDP=m
  CONFIG_BT_HCIUART=m
  CONFIG_BT_HCIUART_H4=y
  CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_BCSP_TXCRC=y
  CONFIG_BT_HCIVHCI=m
  # CONFIG_IEEE80211 is not set
  
@@ -524,16 +557,7 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_COUNT=16
  CONFIG_BLK_DEV_RAM_SIZE=16384
  CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
  # CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
  # CONFIG_ATA_OVER_ETH is not set
  
  #
@@ -572,6 +596,7 @@ CONFIG_SCSI_FC_ATTRS=m
  #
  # SCSI low-level drivers
  #
+# CONFIG_ISCSI_TCP is not set
  CONFIG_SCSI_SATA=m
  # CONFIG_SCSI_DEBUG is not set
  
@@ -647,6 +672,7 @@ CONFIG_ATMEL=m
  #
  # ATM drivers
  #
+# CONFIG_ATM_DUMMY is not set
  CONFIG_ATM_TCP=m
  CONFIG_PPP=m
  CONFIG_PPP_MULTILINK=y
@@ -655,6 +681,7 @@ CONFIG_PPP_ASYNC=m
  CONFIG_PPP_SYNC_TTY=m
  CONFIG_PPP_DEFLATE=m
  # CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
  CONFIG_PPPOE=m
  CONFIG_PPPOATM=m
  # CONFIG_SLIP is not set
@@ -705,7 +732,7 @@ CONFIG_JFS_POSIX_ACL=y
  CONFIG_FS_POSIX_ACL=y
  CONFIG_XFS_FS=m
  CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
  CONFIG_XFS_SECURITY=y
  CONFIG_XFS_POSIX_ACL=y
  # CONFIG_XFS_RT is not set
@@ -877,9 +904,10 @@ CONFIG_NLS_KOI8_U=m
  CONFIG_NLS_UTF8=m
  
  #
-# Profiling support
+# Instrumentation Support
  #
  # CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
  
  #
  # Kernel hacking
@@ -895,9 +923,10 @@ CONFIG_DETECT_SOFTLOCKUP=y
  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
  # CONFIG_DEBUG_KOBJECT is not set
  # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
  CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
  # CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
  
  #
  # Security options
diff --git a/buildconfigs/linux-defconfig_xen_x86_32 b/buildconfigs/linux-defconfig_xen_x86_32

index 68c7ff0cf500c9cb2cca04f2b326c57602e868ba..162eeaa4ab64364046cb25d179e86275dfb06547 100644 (file)
--- a/buildconfigs/linux-defconfig_xen_x86_32
+++ b/buildconfigs/linux-defconfig_xen_x86_32
@@ -1,10 +1,11 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen
-# Tue Jan 31 19:01:58 2006
+# Linux kernel version: 2.6.15-xen
+# Wed Feb  1 17:28:24 2006
  #
-CONFIG_X86=y
+CONFIG_X86_32=y
  CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
  CONFIG_MMU=y
  CONFIG_UID16=y
  CONFIG_GENERIC_ISA_DMA=y
@@ -38,6 +39,7 @@ CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
  # CONFIG_CPUSETS is not set
  CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  CONFIG_EMBEDDED=y
  CONFIG_KALLSYMS=y
  # CONFIG_KALLSYMS_ALL is not set
@@ -47,7 +49,6 @@ CONFIG_BUG=y
  CONFIG_BASE_FULL=y
  CONFIG_FUTEX=y
  CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  CONFIG_SHMEM=y
  CONFIG_CC_ALIGN_FUNCTIONS=0
  CONFIG_CC_ALIGN_LABELS=0
@@ -68,6 +69,24 @@ CONFIG_MODVERSIONS=y
  CONFIG_KMOD=y
  CONFIG_STOP_MACHINE=y
  
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
  #
  # Processor type and features
  #
@@ -113,8 +132,10 @@ CONFIG_X86_WP_WORKS_OK=y
  CONFIG_X86_INVLPG=y
  CONFIG_X86_BSWAP=y
  CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
  CONFIG_X86_GOOD_APIC=y
  CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
  CONFIG_SMP=y
  CONFIG_SMP_ALTERNATIVES=y
  CONFIG_NR_CPUS=8
@@ -148,6 +169,7 @@ CONFIG_FLATMEM_MANUAL=y
  CONFIG_FLATMEM=y
  CONFIG_FLAT_NODE_MEM_MAP=y
  # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
  CONFIG_MTRR=y
  # CONFIG_REGPARM is not set
  CONFIG_SECCOMP=y
@@ -156,8 +178,8 @@ CONFIG_HZ_250=y
  # CONFIG_HZ_1000 is not set
  CONFIG_HZ=250
  CONFIG_PHYSICAL_START=0x100000
-CONFIG_HOTPLUG_CPU=y
  # CONFIG_CRASH_DUMP is not set
+CONFIG_HOTPLUG_CPU=y
  
  #
  # Power management options (ACPI, APM)
@@ -175,6 +197,7 @@ CONFIG_ACPI_VIDEO=m
  CONFIG_ACPI_HOTKEY=m
  CONFIG_ACPI_FAN=m
  CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_HOTPLUG_CPU=y
  CONFIG_ACPI_THERMAL=m
  CONFIG_ACPI_ASUS=m
  CONFIG_ACPI_IBM=m
@@ -185,7 +208,7 @@ CONFIG_ACPI_EC=y
  CONFIG_ACPI_POWER=y
  CONFIG_ACPI_SYSTEM=y
  # CONFIG_X86_PM_TIMER is not set
-# CONFIG_ACPI_CONTAINER is not set
+CONFIG_ACPI_CONTAINER=m
  
  #
  # CPU Frequency scaling
@@ -206,7 +229,6 @@ CONFIG_PCI_MMCONFIG=y
  # CONFIG_PCI_LEGACY_PROC is not set
  # CONFIG_PCI_DEBUG is not set
  CONFIG_SCx200=m
-# CONFIG_HOTPLUG_CPU is not set
  
  #
  # PCCARD (PCMCIA/CardBus) support
@@ -341,6 +363,10 @@ CONFIG_IPV6_TUNNEL=m
  CONFIG_NETFILTER=y
  # CONFIG_NETFILTER_DEBUG is not set
  CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
  CONFIG_NETFILTER_NETLINK=m
  CONFIG_NETFILTER_NETLINK_QUEUE=m
  CONFIG_NETFILTER_NETLINK_LOG=m
@@ -534,10 +560,18 @@ CONFIG_ECONET=m
  CONFIG_ECONET_AUNUDP=y
  CONFIG_ECONET_NATIVE=y
  CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
  CONFIG_NET_SCHED=y
  CONFIG_NET_SCH_CLK_JIFFIES=y
  # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
  # CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
  CONFIG_NET_SCH_CBQ=m
  CONFIG_NET_SCH_HTB=m
  CONFIG_NET_SCH_HFSC=m
@@ -551,8 +585,10 @@ CONFIG_NET_SCH_GRED=m
  CONFIG_NET_SCH_DSMARK=m
  CONFIG_NET_SCH_NETEM=m
  CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
  CONFIG_NET_CLS=y
  CONFIG_NET_CLS_BASIC=m
  CONFIG_NET_CLS_TCINDEX=m
@@ -561,7 +597,6 @@ CONFIG_NET_CLS_ROUTE=y
  CONFIG_NET_CLS_FW=m
  CONFIG_NET_CLS_U32=m
  # CONFIG_CLS_U32_PERF is not set
-# CONFIG_NET_CLS_IND is not set
  # CONFIG_CLS_U32_MARK is not set
  CONFIG_NET_CLS_RSVP=m
  CONFIG_NET_CLS_RSVP6=m
@@ -574,6 +609,8 @@ CONFIG_NET_EMATCH_META=m
  CONFIG_NET_EMATCH_TEXT=m
  # CONFIG_NET_CLS_ACT is not set
  CONFIG_NET_CLS_POLICE=y
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_ESTIMATOR=y
  
  #
  # Network testing
@@ -676,7 +713,6 @@ CONFIG_BT_HCIUSB_SCO=y
  CONFIG_BT_HCIUART=m
  CONFIG_BT_HCIUART_H4=y
  CONFIG_BT_HCIUART_BCSP=y
-# CONFIG_BT_HCIUART_BCSP_TXCRC is not set
  CONFIG_BT_HCIBCM203X=m
  # CONFIG_BT_HCIBPA10X is not set
  CONFIG_BT_HCIBFUSB=m
@@ -731,6 +767,7 @@ CONFIG_FTL=m
  CONFIG_NFTL=m
  CONFIG_NFTL_RW=y
  CONFIG_INFTL=m
+CONFIG_RFD_FTL=m
  
  #
  # RAM/ROM/Flash chip drivers
@@ -822,6 +859,12 @@ CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
  # CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
  # CONFIG_MTD_NAND_NANDSIM is not set
  
+#
+# OneNAND Flash Device Drivers
+#
+CONFIG_MTD_ONENAND=m
+# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
+
  #
  # Parallel port support
  #
@@ -896,18 +939,9 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_COUNT=16
  CONFIG_BLK_DEV_RAM_SIZE=16384
  CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
  CONFIG_CDROM_PKTCDVD=m
  CONFIG_CDROM_PKTCDVD_BUFFERS=8
  # CONFIG_CDROM_PKTCDVD_WCACHE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
  CONFIG_ATA_OVER_ETH=m
  
  #
@@ -957,6 +991,7 @@ CONFIG_BLK_DEV_TRIFLEX=y
  CONFIG_BLK_DEV_CY82C693=y
  CONFIG_BLK_DEV_CS5520=y
  CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_CS5535=m
  CONFIG_BLK_DEV_HPT34X=y
  # CONFIG_HPT34X_AUTODMA is not set
  CONFIG_BLK_DEV_HPT366=y
@@ -1010,12 +1045,13 @@ CONFIG_SCSI_LOGGING=y
  #
  CONFIG_SCSI_SPI_ATTRS=m
  CONFIG_SCSI_FC_ATTRS=m
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=m
  CONFIG_SCSI_SAS_ATTRS=m
  
  #
  # SCSI low-level drivers
  #
+CONFIG_ISCSI_TCP=m
  CONFIG_BLK_DEV_3W_XXXX_RAID=m
  CONFIG_SCSI_3W_9XXX=m
  CONFIG_SCSI_ACARD=m
@@ -1046,16 +1082,17 @@ CONFIG_SCSI_SATA_SVW=m
  CONFIG_SCSI_ATA_PIIX=m
  CONFIG_SCSI_SATA_MV=m
  CONFIG_SCSI_SATA_NV=m
-CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_PDC_ADMA=m
  # CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=m
  CONFIG_SCSI_SATA_SX4=m
  CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIL24=m
  CONFIG_SCSI_SATA_SIS=m
  CONFIG_SCSI_SATA_ULI=m
  CONFIG_SCSI_SATA_VIA=m
  CONFIG_SCSI_SATA_VITESSE=m
  CONFIG_SCSI_SATA_INTEL_COMBINED=y
-# CONFIG_SCSI_CPQFCTS is not set
  CONFIG_SCSI_DMX3191D=m
  CONFIG_SCSI_EATA_PIO=m
  CONFIG_SCSI_FUTURE_DOMAIN=m
@@ -1074,11 +1111,9 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
  CONFIG_SCSI_IPR=m
  # CONFIG_SCSI_IPR_TRACE is not set
  # CONFIG_SCSI_IPR_DUMP is not set
-CONFIG_SCSI_QLOGIC_ISP=m
  CONFIG_SCSI_QLOGIC_FC=m
  CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
  CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLOGIC_1280_1040=y
  CONFIG_SCSI_QLA2XXX=m
  CONFIG_SCSI_QLA21XX=m
  CONFIG_SCSI_QLA22XX=m
@@ -1206,7 +1241,6 @@ CONFIG_ARCNET_COM20020_PCI=m
  # PHY device support
  #
  CONFIG_PHYLIB=m
-CONFIG_PHYCONTROL=y
  
  #
  # MII PHY device drivers
@@ -1302,7 +1336,6 @@ CONFIG_IXGB=m
  # CONFIG_IXGB_NAPI is not set
  CONFIG_S2IO=m
  # CONFIG_S2IO_NAPI is not set
-# CONFIG_2BUFF_MODE is not set
  
  #
  # Token Ring devices
@@ -1418,6 +1451,7 @@ CONFIG_SBNI=m
  #
  # ATM drivers
  #
+CONFIG_ATM_DUMMY=m
  CONFIG_ATM_TCP=m
  CONFIG_ATM_LANAI=m
  CONFIG_ATM_ENI=m
@@ -1462,6 +1496,7 @@ CONFIG_PPP_ASYNC=m
  CONFIG_PPP_SYNC_TTY=m
  CONFIG_PPP_DEFLATE=m
  CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
  CONFIG_PPPOE=m
  CONFIG_PPPOATM=m
  CONFIG_SLIP=m
@@ -1674,6 +1709,7 @@ CONFIG_TOUCHSCREEN_MTOUCH=m
  CONFIG_TOUCHSCREEN_MK712=m
  CONFIG_INPUT_MISC=y
  CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_WISTRON_BTNS=m
  CONFIG_INPUT_UINPUT=m
  
  #
@@ -1816,17 +1852,20 @@ CONFIG_DRM_SAVAGE=m
  # PCMCIA character devices
  #
  CONFIG_SYNCLINK_CS=m
+CONFIG_CARDMAN_4000=m
+CONFIG_CARDMAN_4040=m
  CONFIG_MWAVE=m
  CONFIG_SCx200_GPIO=m
  CONFIG_RAW_DRIVER=m
-# CONFIG_HPET is not set
  CONFIG_MAX_RAW_DEVS=256
+# CONFIG_HPET is not set
  CONFIG_HANGCHECK_TIMER=m
  
  #
  # TPM devices
  #
  # CONFIG_TCG_TPM is not set
+CONFIG_TELCLOCK=m
  
  #
  # I2C support
@@ -1883,6 +1922,7 @@ CONFIG_SENSORS_PCA9539=m
  CONFIG_SENSORS_PCF8591=m
  CONFIG_SENSORS_RTC8564=m
  CONFIG_SENSORS_MAX6875=m
+CONFIG_RTC_X1205_I2C=m
  # CONFIG_I2C_DEBUG_CORE is not set
  # CONFIG_I2C_DEBUG_ALGO is not set
  # CONFIG_I2C_DEBUG_BUS is not set
@@ -1964,6 +2004,7 @@ CONFIG_VIDEO_DEV=m
  # Video Adapters
  #
  CONFIG_VIDEO_BT848=m
+# CONFIG_VIDEO_BT848_DVB is not set
  CONFIG_VIDEO_SAA6588=m
  CONFIG_VIDEO_BWQCAM=m
  CONFIG_VIDEO_CQCAM=m
@@ -1990,7 +2031,10 @@ CONFIG_VIDEO_HEXIUM_ORION=m
  CONFIG_VIDEO_HEXIUM_GEMINI=m
  CONFIG_VIDEO_CX88=m
  # CONFIG_VIDEO_CX88_DVB is not set
+CONFIG_VIDEO_EM28XX=m
  CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_AUDIO_DECODER=m
+CONFIG_VIDEO_DECODER=m
  
  #
  # Radio Adapters
@@ -2098,6 +2142,7 @@ CONFIG_DVB_STV0297=m
  # ATSC (North American/Korean Terresterial DTV) frontends
  #
  CONFIG_DVB_NXT2002=m
+CONFIG_DVB_NXT200X=m
  CONFIG_DVB_OR51211=m
  CONFIG_DVB_OR51132=m
  CONFIG_DVB_BCM3510=m
@@ -2118,7 +2163,6 @@ CONFIG_FB=y
  CONFIG_FB_CFB_FILLRECT=m
  CONFIG_FB_CFB_COPYAREA=m
  CONFIG_FB_CFB_IMAGEBLIT=m
-CONFIG_FB_SOFT_CURSOR=m
  # CONFIG_FB_MACMODES is not set
  CONFIG_FB_MODE_HELPERS=y
  CONFIG_FB_TILEBLITTING=y
@@ -2134,6 +2178,7 @@ CONFIG_FB_VGA16=m
  CONFIG_VIDEO_SELECT=y
  CONFIG_FB_HGA=m
  # CONFIG_FB_HGA_ACCEL is not set
+CONFIG_FB_S1D13XXX=m
  CONFIG_FB_NVIDIA=m
  CONFIG_FB_NVIDIA_I2C=y
  CONFIG_FB_RIVA=m
@@ -2176,7 +2221,6 @@ CONFIG_FB_TRIDENT=m
  # CONFIG_FB_PM3 is not set
  CONFIG_FB_GEODE=y
  CONFIG_FB_GEODE_GX1=m
-CONFIG_FB_S1D13XXX=m
  CONFIG_FB_VIRTUAL=m
  
  #
@@ -2185,6 +2229,7 @@ CONFIG_FB_VIRTUAL=m
  CONFIG_VGA_CONSOLE=y
  CONFIG_DUMMY_CONSOLE=y
  CONFIG_FRAMEBUFFER_CONSOLE=m
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
  # CONFIG_FONTS is not set
  CONFIG_FONT_8x8=y
  CONFIG_FONT_8x16=y
@@ -2204,6 +2249,8 @@ CONFIG_SOUND=m
  # Advanced Linux Sound Architecture
  #
  CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_BUS=m
  CONFIG_SND_TIMER=m
  CONFIG_SND_PCM=m
  CONFIG_SND_HWDEP=m
@@ -2231,8 +2278,6 @@ CONFIG_SND_VIRMIDI=m
  CONFIG_SND_MTPAV=m
  CONFIG_SND_SERIAL_U16550=m
  CONFIG_SND_MPU401=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
  
  #
  # PCI devices
@@ -2295,30 +2340,13 @@ CONFIG_SND_USB_USX2Y=m
  # Open Sound System
  #
  CONFIG_SOUND_PRIME=m
-CONFIG_SOUND_BT878=m
-CONFIG_SOUND_CMPCI=m
-# CONFIG_SOUND_CMPCI_FM is not set
-# CONFIG_SOUND_CMPCI_MIDI is not set
-CONFIG_SOUND_CMPCI_JOYSTICK=y
-CONFIG_SOUND_EMU10K1=m
+# CONFIG_OBSOLETE_OSS_DRIVER is not set
  CONFIG_SOUND_FUSION=m
-CONFIG_SOUND_CS4281=m
-CONFIG_SOUND_ES1370=m
-CONFIG_SOUND_ES1371=m
-CONFIG_SOUND_ESSSOLO1=m
-CONFIG_SOUND_MAESTRO=m
-CONFIG_SOUND_MAESTRO3=m
  CONFIG_SOUND_ICH=m
-CONFIG_SOUND_SONICVIBES=m
  CONFIG_SOUND_TRIDENT=m
  # CONFIG_SOUND_MSNDCLAS is not set
  # CONFIG_SOUND_MSNDPIN is not set
-CONFIG_SOUND_VIA82CXXX=m
  CONFIG_SOUND_TVMIXER=m
-CONFIG_SOUND_ALI5455=m
-CONFIG_SOUND_FORTE=m
-CONFIG_SOUND_RME96XX=m
-CONFIG_SOUND_AD1980=m
  
  #
  # USB support
@@ -2355,15 +2383,15 @@ CONFIG_USB_SL811_CS=m
  # USB Device Class drivers
  #
  # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
  
  #
-# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
  #
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
  
  #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# may also be needed; see USB_STORAGE Help for more information
  #
  CONFIG_USB_STORAGE=m
  # CONFIG_USB_STORAGE_DEBUG is not set
@@ -2375,7 +2403,6 @@ CONFIG_USB_STORAGE_USBAT=y
  CONFIG_USB_STORAGE_SDDR09=y
  CONFIG_USB_STORAGE_SDDR55=y
  CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ONETOUCH=y
  
  #
  # USB Input Devices
@@ -2460,6 +2487,7 @@ CONFIG_USB_USS720=m
  CONFIG_USB_SERIAL=m
  CONFIG_USB_SERIAL_GENERIC=y
  CONFIG_USB_SERIAL_AIRPRIME=m
+CONFIG_USB_SERIAL_ANYDATA=m
  CONFIG_USB_SERIAL_BELKIN=m
  CONFIG_USB_SERIAL_WHITEHEAT=m
  CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
@@ -2593,7 +2621,7 @@ CONFIG_JFS_STATISTICS=y
  CONFIG_FS_POSIX_ACL=y
  CONFIG_XFS_FS=m
  CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
  CONFIG_XFS_SECURITY=y
  CONFIG_XFS_POSIX_ACL=y
  CONFIG_XFS_RT=y
@@ -2660,6 +2688,7 @@ CONFIG_JFFS_PROC_FS=y
  CONFIG_JFFS2_FS=m
  CONFIG_JFFS2_FS_DEBUG=0
  CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
  # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
  CONFIG_JFFS2_ZLIB=y
  CONFIG_JFFS2_RTIME=y
@@ -2786,6 +2815,11 @@ CONFIG_NLS_KOI8_R=m
  CONFIG_NLS_KOI8_U=m
  CONFIG_NLS_UTF8=m
  
+#
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
  #
  # Kernel hacking
  #
@@ -2803,10 +2837,11 @@ CONFIG_DETECT_SOFTLOCKUP=y
  # CONFIG_DEBUG_BUGVERBOSE is not set
  # CONFIG_DEBUG_INFO is not set
  # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
  # CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
  # CONFIG_EARLY_PRINTK is not set
  # CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
  # CONFIG_DEBUG_STACK_USAGE is not set
  # CONFIG_DEBUG_PAGEALLOC is not set
  # CONFIG_4KSTACKS is not set
diff --git a/buildconfigs/linux-defconfig_xen_x86_64 b/buildconfigs/linux-defconfig_xen_x86_64

index e55c9bb1953b7d2a86dc22800d75ab78761b7694..efb137f4f90846fe299c7fb1b5faf87b5c13ae7c 100644 (file)
--- a/buildconfigs/linux-defconfig_xen_x86_64
+++ b/buildconfigs/linux-defconfig_xen_x86_64
@@ -1,7 +1,7 @@
  #
  # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen
-# Tue Jan 31 18:19:07 2006
+# Linux kernel version: 2.6.15-xen
+# Wed Feb  1 15:51:35 2006
  #
  CONFIG_X86_64=y
  CONFIG_64BIT=y
@@ -42,6 +42,7 @@ CONFIG_KOBJECT_UEVENT=y
  # CONFIG_IKCONFIG is not set
  # CONFIG_CPUSETS is not set
  CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
  # CONFIG_EMBEDDED is not set
  CONFIG_KALLSYMS=y
  # CONFIG_KALLSYMS_ALL is not set
@@ -71,6 +72,24 @@ CONFIG_MODULE_SRCVERSION_ALL=y
  CONFIG_KMOD=y
  CONFIG_STOP_MACHINE=y
  
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
  #
  # Processor type and features
  #
@@ -92,7 +111,6 @@ CONFIG_PREEMPT_NONE=y
  # CONFIG_PREEMPT_VOLUNTARY is not set
  # CONFIG_PREEMPT is not set
  CONFIG_PREEMPT_BKL=y
-# CONFIG_NUMA is not set
  CONFIG_ARCH_FLATMEM_ENABLE=y
  CONFIG_SELECT_MEMORY_MODEL=y
  CONFIG_FLATMEM_MANUAL=y
@@ -101,6 +119,7 @@ CONFIG_FLATMEM_MANUAL=y
  CONFIG_FLATMEM=y
  CONFIG_FLAT_NODE_MEM_MAP=y
  # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
  CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
  CONFIG_NR_CPUS=8
  # CONFIG_HOTPLUG_CPU is not set
@@ -259,6 +278,10 @@ CONFIG_IPV6_TUNNEL=m
  CONFIG_NETFILTER=y
  # CONFIG_NETFILTER_DEBUG is not set
  CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
  CONFIG_NETFILTER_NETLINK=m
  CONFIG_NETFILTER_NETLINK_QUEUE=m
  CONFIG_NETFILTER_NETLINK_LOG=m
@@ -444,10 +467,18 @@ CONFIG_IPDDP_DECAP=y
  CONFIG_NET_DIVERT=y
  # CONFIG_ECONET is not set
  CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
  CONFIG_NET_SCHED=y
  CONFIG_NET_SCH_CLK_JIFFIES=y
  # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
  # CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
  CONFIG_NET_SCH_CBQ=m
  CONFIG_NET_SCH_HTB=m
  CONFIG_NET_SCH_HFSC=m
@@ -461,8 +492,10 @@ CONFIG_NET_SCH_GRED=m
  CONFIG_NET_SCH_DSMARK=m
  CONFIG_NET_SCH_NETEM=m
  CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
  CONFIG_NET_CLS=y
  CONFIG_NET_CLS_BASIC=m
  CONFIG_NET_CLS_TCINDEX=m
@@ -471,7 +504,6 @@ CONFIG_NET_CLS_ROUTE=y
  CONFIG_NET_CLS_FW=m
  CONFIG_NET_CLS_U32=m
  CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_IND=y
  CONFIG_CLS_U32_MARK=y
  CONFIG_NET_CLS_RSVP=m
  CONFIG_NET_CLS_RSVP6=m
@@ -484,6 +516,8 @@ CONFIG_NET_EMATCH_META=m
  CONFIG_NET_EMATCH_TEXT=m
  # CONFIG_NET_CLS_ACT is not set
  CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_ESTIMATOR=y
  
  #
  # Network testing
@@ -570,7 +604,6 @@ CONFIG_BT_HCIUSB_SCO=y
  CONFIG_BT_HCIUART=m
  CONFIG_BT_HCIUART_H4=y
  CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_BCSP_TXCRC=y
  CONFIG_BT_HCIBCM203X=m
  CONFIG_BT_HCIBPA10X=m
  CONFIG_BT_HCIBFUSB=m
@@ -621,6 +654,7 @@ CONFIG_FTL=m
  CONFIG_NFTL=m
  CONFIG_NFTL_RW=y
  CONFIG_INFTL=m
+CONFIG_RFD_FTL=m
  
  #
  # RAM/ROM/Flash chip drivers
@@ -702,6 +736,12 @@ CONFIG_MTD_NAND_IDS=m
  # CONFIG_MTD_NAND_DISKONCHIP is not set
  # CONFIG_MTD_NAND_NANDSIM is not set
  
+#
+# OneNAND Flash Device Drivers
+#
+CONFIG_MTD_ONENAND=m
+# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
+
  #
  # Parallel port support
  #
@@ -767,18 +807,9 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_COUNT=16
  CONFIG_BLK_DEV_RAM_SIZE=16384
  CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
  CONFIG_CDROM_PKTCDVD=m
  CONFIG_CDROM_PKTCDVD_BUFFERS=8
  # CONFIG_CDROM_PKTCDVD_WCACHE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
  CONFIG_ATA_OVER_ETH=m
  
  #
@@ -885,6 +916,7 @@ CONFIG_SCSI_SAS_ATTRS=m
  #
  # SCSI low-level drivers
  #
+CONFIG_ISCSI_TCP=m
  CONFIG_BLK_DEV_3W_XXXX_RAID=m
  CONFIG_SCSI_3W_9XXX=m
  CONFIG_SCSI_ACARD=m
@@ -914,10 +946,12 @@ CONFIG_SCSI_SATA_SVW=m
  CONFIG_SCSI_ATA_PIIX=y
  CONFIG_SCSI_SATA_MV=m
  CONFIG_SCSI_SATA_NV=m
-CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_PDC_ADMA=m
  CONFIG_SCSI_SATA_QSTOR=m
+CONFIG_SCSI_SATA_PROMISE=m
  CONFIG_SCSI_SATA_SX4=m
  CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIL24=m
  CONFIG_SCSI_SATA_SIS=m
  CONFIG_SCSI_SATA_ULI=m
  CONFIG_SCSI_SATA_VIA=m
@@ -925,7 +959,6 @@ CONFIG_SCSI_SATA_VITESSE=m
  CONFIG_SCSI_SATA_INTEL_COMBINED=y
  CONFIG_SCSI_BUSLOGIC=m
  # CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_CPQFCTS is not set
  # CONFIG_SCSI_DMX3191D is not set
  # CONFIG_SCSI_EATA is not set
  # CONFIG_SCSI_EATA_PIO is not set
@@ -944,10 +977,8 @@ CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
  CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
  # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
  # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
  # CONFIG_SCSI_QLOGIC_FC is not set
  CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLOGIC_1280_1040=y
  CONFIG_SCSI_QLA2XXX=y
  CONFIG_SCSI_QLA21XX=m
  CONFIG_SCSI_QLA22XX=m
@@ -1055,7 +1086,6 @@ CONFIG_TUN=m
  # PHY device support
  #
  CONFIG_PHYLIB=m
-CONFIG_PHYCONTROL=y
  
  #
  # MII PHY device drivers
@@ -1152,7 +1182,6 @@ CONFIG_IXGB=m
  CONFIG_IXGB_NAPI=y
  CONFIG_S2IO=m
  CONFIG_S2IO_NAPI=y
-# CONFIG_2BUFF_MODE is not set
  
  #
  # Token Ring devices
@@ -1208,6 +1237,7 @@ CONFIG_NET_WIRELESS=y
  #
  # ATM drivers
  #
+CONFIG_ATM_DUMMY=m
  CONFIG_ATM_TCP=m
  CONFIG_ATM_LANAI=m
  CONFIG_ATM_ENI=m
@@ -1239,6 +1269,7 @@ CONFIG_PPP_ASYNC=m
  CONFIG_PPP_SYNC_TTY=m
  CONFIG_PPP_DEFLATE=m
  # CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPP_MPPE=m
  CONFIG_PPPOE=m
  CONFIG_PPPOATM=m
  CONFIG_SLIP=m
@@ -1466,7 +1497,6 @@ CONFIG_HW_CONSOLE=y
  #
  # Serial drivers
  #
-# CONFIG_SERIAL_8250 is not set
  
  #
  # Non-8250 serial port support
@@ -1558,6 +1588,7 @@ CONFIG_HANGCHECK_TIMER=m
  # TPM devices
  #
  # CONFIG_TCG_TPM is not set
+CONFIG_TELCLOCK=m
  
  #
  # I2C support
@@ -1611,6 +1642,7 @@ CONFIG_SENSORS_PCA9539=m
  CONFIG_SENSORS_PCF8591=m
  CONFIG_SENSORS_RTC8564=m
  CONFIG_SENSORS_MAX6875=m
+CONFIG_RTC_X1205_I2C=m
  # CONFIG_I2C_DEBUG_CORE is not set
  # CONFIG_I2C_DEBUG_ALGO is not set
  # CONFIG_I2C_DEBUG_BUS is not set
@@ -1692,6 +1724,7 @@ CONFIG_VIDEO_DEV=m
  # Video Adapters
  #
  CONFIG_VIDEO_BT848=m
+# CONFIG_VIDEO_BT848_DVB is not set
  CONFIG_VIDEO_SAA6588=m
  CONFIG_VIDEO_BWQCAM=m
  CONFIG_VIDEO_CQCAM=m
@@ -1711,14 +1744,20 @@ CONFIG_VIDEO_ZORAN_LML33=m
  CONFIG_VIDEO_ZORAN_LML33R10=m
  # CONFIG_VIDEO_ZR36120 is not set
  CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_ALSA=m
  CONFIG_VIDEO_SAA7134_DVB=m
+CONFIG_VIDEO_SAA7134_DVB_ALL_FRONTENDS=y
  CONFIG_VIDEO_MXB=m
  CONFIG_VIDEO_DPC=m
  CONFIG_VIDEO_HEXIUM_ORION=m
  CONFIG_VIDEO_HEXIUM_GEMINI=m
  CONFIG_VIDEO_CX88=m
  CONFIG_VIDEO_CX88_DVB=m
+CONFIG_VIDEO_CX88_DVB_ALL_FRONTENDS=y
+CONFIG_VIDEO_EM28XX=m
  CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_AUDIO_DECODER=m
+CONFIG_VIDEO_DECODER=m
  
  #
  # Radio Adapters
@@ -1831,6 +1870,7 @@ CONFIG_DVB_STV0297=m
  # ATSC (North American/Korean Terresterial DTV) frontends
  #
  CONFIG_DVB_NXT2002=m
+CONFIG_DVB_NXT200X=m
  CONFIG_DVB_OR51211=m
  CONFIG_DVB_OR51132=m
  CONFIG_DVB_BCM3510=m
@@ -1852,7 +1892,6 @@ CONFIG_FB=y
  CONFIG_FB_CFB_FILLRECT=y
  CONFIG_FB_CFB_COPYAREA=y
  CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SOFT_CURSOR=y
  # CONFIG_FB_MACMODES is not set
  CONFIG_FB_MODE_HELPERS=y
  CONFIG_FB_TILEBLITTING=y
@@ -1866,6 +1905,7 @@ CONFIG_FB_VGA16=m
  CONFIG_FB_VESA=y
  CONFIG_VIDEO_SELECT=y
  # CONFIG_FB_HGA is not set
+# CONFIG_FB_S1D13XXX is not set
  # CONFIG_FB_NVIDIA is not set
  CONFIG_FB_RIVA=m
  # CONFIG_FB_RIVA_I2C is not set
@@ -1901,7 +1941,6 @@ CONFIG_FB_TRIDENT=m
  CONFIG_FB_TRIDENT_ACCEL=y
  # CONFIG_FB_PM3 is not set
  # CONFIG_FB_GEODE is not set
-# CONFIG_FB_S1D13XXX is not set
  # CONFIG_FB_VIRTUAL is not set
  
  #
@@ -1910,6 +1949,7 @@ CONFIG_FB_TRIDENT_ACCEL=y
  CONFIG_VGA_CONSOLE=y
  CONFIG_DUMMY_CONSOLE=y
  CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
  # CONFIG_FONTS is not set
  CONFIG_FONT_8x8=y
  CONFIG_FONT_8x16=y
@@ -1936,6 +1976,8 @@ CONFIG_SOUND=m
  # Advanced Linux Sound Architecture
  #
  CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_BUS=m
  CONFIG_SND_TIMER=m
  CONFIG_SND_PCM=m
  CONFIG_SND_HWDEP=m
@@ -1963,8 +2005,6 @@ CONFIG_SND_VIRMIDI=m
  CONFIG_SND_MTPAV=m
  # CONFIG_SND_SERIAL_U16550 is not set
  CONFIG_SND_MPU401=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
  
  #
  # PCI devices
@@ -2059,15 +2099,15 @@ CONFIG_USB_SL811_HCD=m
  # USB Device Class drivers
  #
  # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
  
  #
-# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
  #
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
  
  #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# may also be needed; see USB_STORAGE Help for more information
  #
  CONFIG_USB_STORAGE=m
  # CONFIG_USB_STORAGE_DEBUG is not set
@@ -2079,7 +2119,6 @@ CONFIG_USB_STORAGE_USBAT=y
  CONFIG_USB_STORAGE_SDDR09=y
  CONFIG_USB_STORAGE_SDDR55=y
  CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ONETOUCH=y
  
  #
  # USB Input Devices
@@ -2161,6 +2200,7 @@ CONFIG_USB_USS720=m
  CONFIG_USB_SERIAL=m
  CONFIG_USB_SERIAL_GENERIC=y
  CONFIG_USB_SERIAL_AIRPRIME=m
+CONFIG_USB_SERIAL_ANYDATA=m
  CONFIG_USB_SERIAL_BELKIN=m
  CONFIG_USB_SERIAL_WHITEHEAT=m
  CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
@@ -2252,6 +2292,7 @@ CONFIG_INFINIBAND_MTHCA=m
  # CONFIG_INFINIBAND_MTHCA_DEBUG is not set
  CONFIG_INFINIBAND_IPOIB=m
  # CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
  
  #
  # SN Devices
@@ -2293,7 +2334,7 @@ CONFIG_JFS_SECURITY=y
  CONFIG_FS_POSIX_ACL=y
  CONFIG_XFS_FS=m
  CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
  CONFIG_XFS_SECURITY=y
  CONFIG_XFS_POSIX_ACL=y
  # CONFIG_XFS_RT is not set
@@ -2355,6 +2396,7 @@ CONFIG_EFS_FS=m
  CONFIG_JFFS2_FS=m
  CONFIG_JFFS2_FS_DEBUG=0
  CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
  # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
  CONFIG_JFFS2_ZLIB=y
  CONFIG_JFFS2_RTIME=y
@@ -2474,9 +2516,10 @@ CONFIG_NLS_KOI8_U=m
  CONFIG_NLS_UTF8=m
  
  #
-# Profiling support
+# Instrumentation Support
  #
  # CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
  
  #
  # Kernel hacking
@@ -2492,9 +2535,10 @@ CONFIG_DETECT_SOFTLOCKUP=y
  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
  # CONFIG_DEBUG_KOBJECT is not set
  # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
  # CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
  # CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
  
  #
  # Security options
diff --git a/buildconfigs/mk.linux-2.6-xen b/buildconfigs/mk.linux-2.6-xen

index 257fd6740065e04877d5886bf2005ecfd178e44c..250a0a4c5f0b441a1db8d414ff6ca36c47bc9020 100644 (file)
--- a/buildconfigs/mk.linux-2.6-xen
+++ b/buildconfigs/mk.linux-2.6-xen
@@ -2,7 +2,7 @@
  OS           = linux
  
  LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.14
+LINUX_VER    = 2.6.15
  
  EXTRAVERSION ?= xen
  
diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig b/linux-2.6-xen-sparse/arch/i386/Kconfig

index 0f9682546640b4febe38a8c30ae77d66d845edcb..5ad468597665aadc5afe34a9e5c0fbc4f238ff5d 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig
@@ -5,7 +5,7 @@
  
  mainmenu "Linux Kernel Configuration"
  
-config X86
+config X86_32
         bool
         default y
         help
@@ -18,6 +18,10 @@ config SEMAPHORE_SLEEPERS
         bool
         default y
  
+config X86
+       bool
+       default y
+
  config MMU
         bool
         default y
@@ -160,304 +164,7 @@ config ES7000_CLUSTERED_APIC
         default y
         depends on SMP && X86_ES7000 && MPENTIUMIII
  
-if !X86_ELAN
-
-choice
-       prompt "Processor family"
-       default M686
-
-config M386
-       bool "386"
-       ---help---
-         This is the processor type of your CPU. This information is used for
-         optimizing purposes. In order to compile a kernel that can run on
-         all x86 CPU types (albeit not optimally fast), you can specify
-         "386" here.
-
-         The kernel will not necessarily run on earlier architectures than
-         the one you have chosen, e.g. a Pentium optimized kernel will run on
-         a PPro, but not necessarily on a i486.
-
-         Here are the settings recommended for greatest speed:
-         - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
-         486DLC/DLC2, UMC 486SX-S and NexGen Nx586.  Only "386" kernels
-         will run on a 386 class machine.
-         - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
-         SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
-         - "586" for generic Pentium CPUs lacking the TSC
-         (time stamp counter) register.
-         - "Pentium-Classic" for the Intel Pentium.
-         - "Pentium-MMX" for the Intel Pentium MMX.
-         - "Pentium-Pro" for the Intel Pentium Pro.
-         - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
-         - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
-         - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
-         - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
-         - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
-         - "Crusoe" for the Transmeta Crusoe series.
-         - "Efficeon" for the Transmeta Efficeon series.
-         - "Winchip-C6" for original IDT Winchip.
-         - "Winchip-2" for IDT Winchip 2.
-         - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
-         - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
-         - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
-         - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
-
-         If you don't know what to do, choose "386".
-
-config M486
-       bool "486"
-       help
-         Select this for a 486 series processor, either Intel or one of the
-         compatible processors from AMD, Cyrix, IBM, or Intel.  Includes DX,
-         DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
-         U5S.
-
-config M586
-       bool "586/K5/5x86/6x86/6x86MX"
-       help
-         Select this for an 586 or 686 series processor such as the AMD K5,
-         the Cyrix 5x86, 6x86 and 6x86MX.  This choice does not
-         assume the RDTSC (Read Time Stamp Counter) instruction.
-
-config M586TSC
-       bool "Pentium-Classic"
-       help
-         Select this for a Pentium Classic processor with the RDTSC (Read
-         Time Stamp Counter) instruction for benchmarking.
-
-config M586MMX
-       bool "Pentium-MMX"
-       help
-         Select this for a Pentium with the MMX graphics/multimedia
-         extended instructions.
-
-config M686
-       bool "Pentium-Pro"
-       help
-         Select this for Intel Pentium Pro chips.  This enables the use of
-         Pentium Pro extended instructions, and disables the init-time guard
-         against the f00f bug found in earlier Pentiums.
-
-config MPENTIUMII
-       bool "Pentium-II/Celeron(pre-Coppermine)"
-       help
-         Select this for Intel chips based on the Pentium-II and
-         pre-Coppermine Celeron core.  This option enables an unaligned
-         copy optimization, compiles the kernel with optimization flags
-         tailored for the chip, and applies any applicable Pentium Pro
-         optimizations.
-
-config MPENTIUMIII
-       bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
-       help
-         Select this for Intel chips based on the Pentium-III and
-         Celeron-Coppermine core.  This option enables use of some
-         extended prefetch instructions in addition to the Pentium II
-         extensions.
-
-config MPENTIUMM
-       bool "Pentium M"
-       help
-         Select this for Intel Pentium M (not Pentium-4 M)
-         notebook chips.
-
-config MPENTIUM4
-       bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
-       help
-         Select this for Intel Pentium 4 chips.  This includes the
-         Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
-         (not Pentium M) chips.  This option enables compile flags
-         optimized for the chip, uses the correct cache shift, and
-         applies any applicable Pentium III optimizations.
-
-config MK6
-       bool "K6/K6-II/K6-III"
-       help
-         Select this for an AMD K6-family processor.  Enables use of
-         some extended instructions, and passes appropriate optimization
-         flags to GCC.
-
-config MK7
-       bool "Athlon/Duron/K7"
-       help
-         Select this for an AMD Athlon K7-family processor.  Enables use of
-         some extended instructions, and passes appropriate optimization
-         flags to GCC.
-
-config MK8
-       bool "Opteron/Athlon64/Hammer/K8"
-       help
-         Select this for an AMD Opteron or Athlon64 Hammer-family processor.  Enables
-         use of some extended instructions, and passes appropriate optimization
-         flags to GCC.
-
-config MCRUSOE
-       bool "Crusoe"
-       help
-         Select this for a Transmeta Crusoe processor.  Treats the processor
-         like a 586 with TSC, and sets some GCC optimization flags (like a
-         Pentium Pro with no alignment requirements).
-
-config MEFFICEON
-       bool "Efficeon"
-       help
-         Select this for a Transmeta Efficeon processor.
-
-config MWINCHIPC6
-       bool "Winchip-C6"
-       help
-         Select this for an IDT Winchip C6 chip.  Linux and GCC
-         treat this chip as a 586TSC with some extended instructions
-         and alignment requirements.
-
-config MWINCHIP2
-       bool "Winchip-2"
-       help
-         Select this for an IDT Winchip-2.  Linux and GCC
-         treat this chip as a 586TSC with some extended instructions
-         and alignment requirements.
-
-config MWINCHIP3D
-       bool "Winchip-2A/Winchip-3"
-       help
-         Select this for an IDT Winchip-2A or 3.  Linux and GCC
-         treat this chip as a 586TSC with some extended instructions
-         and alignment reqirements.  Also enable out of order memory
-         stores for this CPU, which can increase performance of some
-         operations.
-
-config MGEODEGX1
-       bool "GeodeGX1"
-       help
-         Select this for a Geode GX1 (Cyrix MediaGX) chip.
-
-config MCYRIXIII
-       bool "CyrixIII/VIA-C3"
-       help
-         Select this for a Cyrix III or C3 chip.  Presently Linux and GCC
-         treat this chip as a generic 586. Whilst the CPU is 686 class,
-         it lacks the cmov extension which gcc assumes is present when
-         generating 686 code.
-         Note that Nehemiah (Model 9) and above will not boot with this
-         kernel due to them lacking the 3DNow! instructions used in earlier
-         incarnations of the CPU.
-
-config MVIAC3_2
-       bool "VIA C3-2 (Nehemiah)"
-       help
-         Select this for a VIA C3 "Nehemiah". Selecting this enables usage
-         of SSE and tells gcc to treat the CPU as a 686.
-         Note, this kernel will not boot on older (pre model 9) C3s.
-
-endchoice
-
-config X86_GENERIC
-       bool "Generic x86 support"
-       help
-         Instead of just including optimizations for the selected
-         x86 variant (e.g. PII, Crusoe or Athlon), include some more
-         generic optimizations as well. This will make the kernel
-         perform better on x86 CPUs other than that selected.
-
-         This is really intended for distributors who need more
-         generic optimizations.
-
-endif
-
-#
-# Define implied options from the CPU selection here
-#
-config X86_CMPXCHG
-       bool
-       depends on !M386
-       default y
-
-config X86_XADD
-       bool
-       depends on !M386
-       default y
-
-config X86_L1_CACHE_SHIFT
-       int
-       default "7" if MPENTIUM4 || X86_GENERIC
-       default "4" if X86_ELAN || M486 || M386
-       default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
-       default "6" if MK7 || MK8 || MPENTIUMM
-
-config RWSEM_GENERIC_SPINLOCK
-       bool
-       depends on M386
-       default y
-
-config RWSEM_XCHGADD_ALGORITHM
-       bool
-       depends on !M386
-       default y
-
-config GENERIC_CALIBRATE_DELAY
-       bool
-       default y
-
-config X86_PPRO_FENCE
-       bool
-       depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
-       default y
-
-config X86_F00F_BUG
-       bool
-       depends on M586MMX || M586TSC || M586 || M486 || M386
-       default y
-
-config X86_WP_WORKS_OK
-       bool
-       depends on !M386
-       default y
-
-config X86_INVLPG
-       bool
-       depends on !M386
-       default y
-
-config X86_BSWAP
-       bool
-       depends on !M386
-       default y
-
-config X86_POPAD_OK
-       bool
-       depends on !M386
-       default y
-
-config X86_ALIGNMENT_16
-       bool
-       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
-       default y
-
-config X86_GOOD_APIC
-       bool
-       depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON
-       default y
-
-config X86_INTEL_USERCOPY
-       bool
-       depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
-       default y
-
-config X86_USE_PPRO_CHECKSUM
-       bool
-       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON
-       default y
-
-config X86_USE_3DNOW
-       bool
-       depends on MCYRIXIII || MK7
-       default y
-
-config X86_OOSTORE
-       bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
-       default y
+source "arch/i386/Kconfig.cpu"
  
  config HPET_TIMER
         bool "HPET Timer Support"
@@ -1041,7 +748,7 @@ depends on PM && !(X86_VISWS || X86_XEN)
  
  config APM
         tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM
+       depends on PM && PM_LEGACY
         ---help---
           APM is a BIOS specification for saving power using several different
           techniques. This is mostly useful for battery powered laptops with
@@ -1333,10 +1040,23 @@ source "drivers/Kconfig"
  
  source "fs/Kconfig"
  
+menu "Instrumentation Support"
+       depends on EXPERIMENTAL
+
  if !X86_XEN
  source "arch/i386/oprofile/Kconfig"
  endif
  
+config KPROBES
+       bool "Kprobes (EXPERIMENTAL)"
+       help
+         Kprobes allows you to trap at almost any kernel address and
+         execute a callback function.  register_kprobe() establishes
+         a probepoint and specifies the callback.  Kprobes is useful
+         for kernel debugging, non-intrusive instrumentation and testing.
+         If in doubt, say "N".
+endmenu
+
  source "arch/i386/Kconfig.debug"
  
  source "security/Kconfig"
@@ -1382,8 +1102,3 @@ config X86_TRAMPOLINE
         bool
         depends on X86_SMP || (X86_VOYAGER && SMP)
         default y
-
-config PC
-       bool
-       depends on X86 && !EMBEDDED
-       default y
diff --git a/linux-2.6-xen-sparse/arch/i386/Makefile b/linux-2.6-xen-sparse/arch/i386/Makefile

index 50e75c29aa4733c22cce153d6b28cef39dd54201..1c31ba91a40e9efce87a918b3b45d1ec266b5104 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/Makefile
@@ -34,35 +34,8 @@ CFLAGS += -pipe -msoft-float
  # prevent gcc from keeping the stack 16 byte aligned
  CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
  
-align := $(cc-option-align)
-cflags-$(CONFIG_M386)          += -march=i386
-cflags-$(CONFIG_M486)          += -march=i486
-cflags-$(CONFIG_M586)          += -march=i586
-cflags-$(CONFIG_M586TSC)       += -march=i586
-cflags-$(CONFIG_M586MMX)       += $(call cc-option,-march=pentium-mmx,-march=i586)
-cflags-$(CONFIG_M686)          += -march=i686
-cflags-$(CONFIG_MPENTIUMII)    += -march=i686 $(call cc-option,-mtune=pentium2)
-cflags-$(CONFIG_MPENTIUMIII)   += -march=i686 $(call cc-option,-mtune=pentium3)
-cflags-$(CONFIG_MPENTIUMM)     += -march=i686 $(call cc-option,-mtune=pentium3)
-cflags-$(CONFIG_MPENTIUM4)     += -march=i686 $(call cc-option,-mtune=pentium4)
-cflags-$(CONFIG_MK6)           += -march=k6
-# Please note, that patches that add -march=athlon-xp and friends are pointless.
-# They make zero difference whatsosever to performance at this time.
-cflags-$(CONFIG_MK7)           += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)
-cflags-$(CONFIG_MK8)           += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4))
-cflags-$(CONFIG_MCRUSOE)       += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MEFFICEON)     += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MWINCHIPC6)    += $(call cc-option,-march=winchip-c6,-march=i586)
-cflags-$(CONFIG_MWINCHIP2)     += $(call cc-option,-march=winchip2,-march=i586)
-cflags-$(CONFIG_MWINCHIP3D)    += $(call cc-option,-march=winchip2,-march=i586)
-cflags-$(CONFIG_MCYRIXIII)     += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MVIAC3_2)      += $(call cc-option,-march=c3-2,-march=i686)
-
-# AMD Elan support
-cflags-$(CONFIG_X86_ELAN)      += -march=i486
-
-# Geode GX1 support
-cflags-$(CONFIG_MGEODEGX1)             += $(call cc-option,-march=pentium-mmx,-march=i486)
+# CPU-specific tuning. Anything which can be shared with UML should go here.
+include $(srctree)/arch/i386/Makefile.cpu
  
  # -mregparm=3 works ok on gcc-3.0 and later
  #
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile

index 53d8c0bbddde15d75196275d71c524764e6110ac..1914055a26824471883b126dde38cdf5ddf9e2b0 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
@@ -86,6 +86,7 @@ ifdef CONFIG_XEN
  include $(srctree)/scripts/Makefile.xen
  
  obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
  n-obj-xen := i8259.o doublefault.o timers/ reboot.o smpboot.o trampoline.o
  
  obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c

index 2d92b3a94be43998cce90c5a5d793ff529a5dceb..cb942706d749fea2bf52e683cda4ed5a1473d940 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c
@@ -36,23 +36,17 @@
  #include <asm/apic.h>
  #include <asm/io.h>
  #include <asm/mpspec.h>
-#ifdef CONFIG_XEN
-#include <asm/fixmap.h>
-#endif
  
  #ifdef CONFIG_X86_64
  
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
  extern void __init clustered_apic_check(void);
-static inline int ioapic_setup_disabled(void)
-{
-       return 0;
-}
  
+extern int gsi_irq_sharing(int gsi);
  #include <asm/proto.h>
  
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
+
+
  #else                          /* X86 */
  
  #ifdef CONFIG_X86_LOCAL_APIC
@@ -60,6 +54,8 @@ static inline int ioapic_setup_disabled(void)
  #include <mach_mpparse.h>
  #endif                         /* CONFIG_X86_LOCAL_APIC */
  
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
  #endif                         /* X86 */
  
  #define BAD_MADT_ENTRY(entry, end) (                                       \
@@ -138,7 +134,7 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
         int idx;
  
  #ifndef CONFIG_XEN
-       if (phys + size < 8 * 1024 * 1024) 
+       if (phys + size < 8 * 1024 * 1024)
                 return __va(phys);
  #endif
  
@@ -254,9 +250,7 @@ acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
  
         acpi_table_print_madt_entry(header);
  
-       /* no utility in registering a disabled processor */
-       if (processor->flags.enabled == 0)
-               return 0;
+       /* Register even disabled CPUs for cpu hotplug */
  
         x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
  
@@ -464,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
                 *irq = IO_APIC_VECTOR(gsi);
         else
  #endif
-               *irq = gsi;
+               *irq = gsi_irq_sharing(gsi);
         return 0;
  }
  
@@ -538,7 +532,7 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
  EXPORT_SYMBOL(acpi_unregister_ioapic);
  
  static unsigned long __init
-acpi_scan_rsdp (unsigned long start, unsigned long length)
+acpi_scan_rsdp(unsigned long start, unsigned long length)
  {
         unsigned long offset = 0;
         unsigned long sig_len = sizeof("RSD PTR ") - 1;
@@ -647,6 +641,13 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
                         return 0;
  
                 pmtmr_ioport = fadt->xpm_tmr_blk.address;
+               /*
+                * "X" fields are optional extensions to the original V1.0
+                * fields, so we must selectively expand V1.0 fields if the
+                * corresponding X field is zero.
+                */
+               if (!pmtmr_ioport)
+                       pmtmr_ioport = fadt->V1_pm_tmr_blk;
         } else {
                 /* FADT rev. 1 */
                 pmtmr_ioport = fadt->V1_pm_tmr_blk;
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/apm.c b/linux-2.6-xen-sparse/arch/i386/kernel/apm.c

deleted file mode 100644 (file)

index cb1cceb..0000000
--- a/linux-2.6-xen-sparse/arch/i386/kernel/apm.c
+++ /dev/null
@@ -1,2420 +0,0 @@
-/* -*- linux-c -*-
- * APM BIOS driver for Linux
- * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au)
- *
- * Initial development of this driver was funded by NEC Australia P/L
- *     and NEC Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * October 1995, Rik Faith (faith@cs.unc.edu):
- *    Minor enhancements and updates (to the patch set) for 1.3.x
- *    Documentation
- * January 1996, Rik Faith (faith@cs.unc.edu):
- *    Make /proc/apm easy to format (bump driver version)
- * March 1996, Rik Faith (faith@cs.unc.edu):
- *    Prohibit APM BIOS calls unless apm_enabled.
- *    (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>)
- * April 1996, Stephen Rothwell (sfr@canb.auug.org.au)
- *    Version 1.0 and 1.1
- * May 1996, Version 1.2
- * Feb 1998, Version 1.3
- * Feb 1998, Version 1.4
- * Aug 1998, Version 1.5
- * Sep 1998, Version 1.6
- * Nov 1998, Version 1.7
- * Jan 1999, Version 1.8
- * Jan 1999, Version 1.9
- * Oct 1999, Version 1.10
- * Nov 1999, Version 1.11
- * Jan 2000, Version 1.12
- * Feb 2000, Version 1.13
- * Nov 2000, Version 1.14
- * Oct 2001, Version 1.15
- * Jan 2002, Version 1.16
- * Oct 2002, Version 1.16ac
- *
- * History:
- *    0.6b: first version in official kernel, Linux 1.3.46
- *    0.7: changed /proc/apm format, Linux 1.3.58
- *    0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59
- *    0.9: only call bios if bios is present, Linux 1.3.72
- *    1.0: use fixed device number, consolidate /proc/apm into this file,
- *         Linux 1.3.85
- *    1.1: support user-space standby and suspend, power off after system
- *         halted, Linux 1.3.98
- *    1.2: When resetting RTC after resume, take care so that the time
- *         is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth
- *         <jtoth@princeton.edu>); improve interaction between
- *         screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
- *    1.2a:Simple change to stop mysterious bug reports with SMP also added
- *        levels to the printk calls. APM is not defined for SMP machines.
- *         The new replacment for it is, but Linux doesn't yet support this.
- *         Alan Cox Linux 2.1.55
- *    1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
- *    1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
- *         Dean Gaudet <dgaudet@arctic.org>.
- *         C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87
- *    1.5: Fix segment register reloading (in case of bad segments saved
- *         across BIOS call).
- *         Stephen Rothwell
- *    1.6: Cope with complier/assembler differences.
- *         Only try to turn off the first display device.
- *         Fix OOPS at power off with no APM BIOS by Jan Echternach
- *                   <echter@informatik.uni-rostock.de>
- *         Stephen Rothwell
- *    1.7: Modify driver's cached copy of the disabled/disengaged flags
- *         to reflect current state of APM BIOS.
- *         Chris Rankin <rankinc@bellsouth.net>
- *         Reset interrupt 0 timer to 100Hz after suspend
- *         Chad Miller <cmiller@surfsouth.com>
- *         Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE
- *         Richard Gooch <rgooch@atnf.csiro.au>
- *         Allow boot time disabling of APM
- *         Make boot messages far less verbose by default
- *         Make asm safer
- *         Stephen Rothwell
- *    1.8: Add CONFIG_APM_RTC_IS_GMT
- *         Richard Gooch <rgooch@atnf.csiro.au>
- *         change APM_NOINTS to CONFIG_APM_ALLOW_INTS
- *         remove dependency on CONFIG_PROC_FS
- *         Stephen Rothwell
- *    1.9: Fix small typo.  <laslo@wodip.opole.pl>
- *         Try to cope with BIOS's that need to have all display
- *         devices blanked and not just the first one.
- *         Ross Paterson <ross@soi.city.ac.uk>
- *         Fix segment limit setting it has always been wrong as
- *         the segments needed to have byte granularity.
- *         Mark a few things __init.
- *         Add hack to allow power off of SMP systems by popular request.
- *         Use CONFIG_SMP instead of __SMP__
- *         Ignore BOUNCES for three seconds.
- *         Stephen Rothwell
- *   1.10: Fix for Thinkpad return code.
- *         Merge 2.2 and 2.3 drivers.
- *         Remove APM dependencies in arch/i386/kernel/process.c
- *         Remove APM dependencies in drivers/char/sysrq.c
- *         Reset time across standby.
- *         Allow more inititialisation on SMP.
- *         Remove CONFIG_APM_POWER_OFF and make it boot time
- *         configurable (default on).
- *         Make debug only a boot time parameter (remove APM_DEBUG).
- *         Try to blank all devices on any error.
- *   1.11: Remove APM dependencies in drivers/char/console.c
- *         Check nr_running to detect if we are idle (from
- *         Borislav Deianov <borislav@lix.polytechnique.fr>)
- *         Fix for bioses that don't zero the top part of the
- *         entrypoint offset (Mario Sitta <sitta@al.unipmn.it>)
- *         (reported by Panos Katsaloulis <teras@writeme.com>).
- *         Real mode power off patch (Walter Hofmann
- *         <Walter.Hofmann@physik.stud.uni-erlangen.de>).
- *   1.12: Remove CONFIG_SMP as the compiler will optimize
- *         the code away anyway (smp_num_cpus == 1 in UP)
- *         noted by Artur Skawina <skawina@geocities.com>.
- *         Make power off under SMP work again.
- *         Fix thinko with initial engaging of BIOS.
- *         Make sure power off only happens on CPU 0
- *         (Paul "Rusty" Russell <rusty@rustcorp.com.au>).
- *         Do error notification to user mode if BIOS calls fail.
- *         Move entrypoint offset fix to ...boot/setup.S
- *         where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>).
- *         Remove smp-power-off. SMP users must now specify
- *         "apm=power-off" on the kernel command line. Suggested
- *         by Jim Avera <jima@hal.com>, modified by Alan Cox
- *         <alan@lxorguk.ukuu.org.uk>.
- *         Register the /proc/apm entry even on SMP so that
- *         scripts that check for it before doing power off
- *         work (Jim Avera <jima@hal.com>).
- *   1.13: Changes for new pm_ interfaces (Andy Henroid
- *         <andy_henroid@yahoo.com>).
- *         Modularize the code.
- *         Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS
- *         is now the way life works).
- *         Fix thinko in suspend() (wrong return).
- *         Notify drivers on critical suspend.
- *         Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz>
- *         modified by sfr).
- *         Disable interrupts while we are suspended (Andy Henroid
- *         <andy_henroid@yahoo.com> fixed by sfr).
- *         Make power off work on SMP again (Tony Hoyle
- *         <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr.
- *         Remove CONFIG_APM_SUSPEND_BOUNCE.  The bounce ignore
- *         interval is now configurable.
- *   1.14: Make connection version persist across module unload/load.
- *         Enable and engage power management earlier.
- *         Disengage power management on module unload.
- *         Changed to use the sysrq-register hack for registering the
- *         power off function called by magic sysrq based upon discussions
- *         in irc://irc.openprojects.net/#kernelnewbies
- *         (Crutcher Dunnavant <crutcher+kernel@datastacks.com>).
- *         Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable.
- *         (Arjan van de Ven <arjanv@redhat.com>) modified by sfr.
- *         Work around byte swap bug in one of the Vaio's BIOS's
- *         (Marc Boucher <marc@mbsi.ca>).
- *         Exposed the disable flag to dmi so that we can handle known
- *         broken APM (Alan Cox <alan@redhat.com>).
- *   1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
- *         calling it - instead idle. (Alan Cox <alan@redhat.com>)
- *         If an APM idle fails log it and idle sensibly
- *   1.15: Don't queue events to clients who open the device O_WRONLY.
- *         Don't expect replies from clients who open the device O_RDONLY.
- *         (Idea from Thomas Hood)
- *         Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>)
- *   1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.)
- *         Notify listeners of standby or suspend events before notifying
- *         drivers. Return EBUSY to ioctl() if suspend is rejected.
- *         (Russell King <rmk@arm.linux.org.uk> and Thomas Hood)
- *         Ignore first resume after we generate our own resume event
- *         after a suspend (Thomas Hood)
- *         Daemonize now gets rid of our controlling terminal (sfr).
- *         CONFIG_APM_CPU_IDLE now just affects the default value of
- *         idle_threshold (sfr).
- *         Change name of kernel apm daemon (as it no longer idles) (sfr).
- *   1.16ac: Fix up SMP support somewhat. You can now force SMP on and we
- *        make _all_ APM calls on the CPU#0. Fix unsafe sign bug.
- *        TODO: determine if its "boot CPU" or "CPU0" we want to lock to.
- *
- * APM 1.1 Reference:
- *
- *   Intel Corporation, Microsoft Corporation. Advanced Power Management
- *   (APM) BIOS Interface Specification, Revision 1.1, September 1993.
- *   Intel Order Number 241704-001.  Microsoft Part Number 781-110-X01.
- *
- * [This document is available free from Intel by calling 800.628.8686 (fax
- * 916.356.6100) or 800.548.4725; or via anonymous ftp from
- * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc.  It is also
- * available from Microsoft by calling 206.882.8080.]
- *
- * APM 1.2 Reference:
- *   Intel Corporation, Microsoft Corporation. Advanced Power Management
- *   (APM) BIOS Interface Specification, Revision 1.2, February 1996.
- *
- * [This document is available from Microsoft at:
- *    http://www.microsoft.com/hwdev/busbios/amp_12.htm]
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/poll.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/timer.h>
-#include <linux/fcntl.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <linux/miscdevice.h>
-#include <linux/apm_bios.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/sched.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/dmi.h>
-#include <linux/suspend.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-extern unsigned long get_cmos_time(void);
-extern void machine_real_restart(unsigned char *, int);
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-extern int (*console_blank_hook)(int);
-#endif
-
-/*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define        APM_MINOR_DEV   134
-
-/*
- * See Documentation/Config.help for the configuration options.
- *
- * Various options can be changed at boot time as follows:
- * (We allow underscores for compatibility with the modules code)
- *     apm=on/off                      enable/disable APM
- *         [no-]allow[-_]ints          allow interrupts during BIOS calls
- *         [no-]broken[-_]psr          BIOS has a broken GetPowerStatus call
- *         [no-]realmode[-_]power[-_]off       switch to real mode before
- *                                             powering off
- *         [no-]debug                  log some debugging messages
- *         [no-]power[-_]off           power off on shutdown
- *         [no-]smp                    Use apm even on an SMP box
- *         bounce[-_]interval=<n>      number of ticks to ignore suspend
- *                                     bounces
- *          idle[-_]threshold=<n>       System idle percentage above which to
- *                                      make APM BIOS idle calls. Set it to
- *                                      100 to disable.
- *          idle[-_]period=<n>          Period (in 1/100s of a second) over
- *                                      which the idle percentage is
- *                                      calculated.
- */
-
-/* KNOWN PROBLEM MACHINES:
- *
- * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant
- *                         [Confirmed by TI representative]
- * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification
- *                    [Confirmed by BIOS disassembly]
- *                    [This may work now ...]
- * P: Toshiba 1950S: battery life information only gets updated after resume
- * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking
- *     broken in BIOS [Reported by Garst R. Reese <reese@isn.net>]
- * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP
- *     Neale Banks <neale@lowendale.com.au> December 2000
- *
- * Legend: U = unusable with APM patches
- *         P = partially usable with APM patches
- */
-
-/*
- * Define as 1 to make the driver always call the APM BIOS busy
- * routine even if the clock was not reported as slowed by the
- * idle routine.  Otherwise, define as 0.
- */
-#define ALWAYS_CALL_BUSY   1
-
-/*
- * Define to make the APM BIOS calls zero all data segment registers (so
- * that an incorrect BIOS implementation will cause a kernel panic if it
- * tries to write to arbitrary memory).
- */
-#define APM_ZERO_SEGS
-
-#include "apm.h"
-
-/*
- * Define to make all _set_limit calls use 64k limits.  The APM 1.1 BIOS is
- * supposed to provide limit information that it recognizes.  Many machines
- * do this correctly, but many others do not restrict themselves to their
- * claimed limit.  When this happens, they will cause a segmentation
- * violation in the kernel at boot time.  Most BIOS's, however, will
- * respect a 64k limit, so we use that.  If you want to be pedantic and
- * hold your BIOS to its claims, then undefine this.
- */
-#define APM_RELAX_SEGMENTS
-
-/*
- * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
- * This patched by Chad Miller <cmiller@surfsouth.com>, original code by
- * David Chen <chen@ctpa04.mit.edu>
- */
-#undef INIT_TIMER_AFTER_SUSPEND
-
-#ifdef INIT_TIMER_AFTER_SUSPEND
-#include <linux/timex.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#endif
-
-/*
- * Need to poll the APM BIOS every second
- */
-#define APM_CHECK_TIMEOUT      (HZ)
-
-/*
- * Ignore suspend events for this amount of time after a resume
- */
-#define DEFAULT_BOUNCE_INTERVAL                (3 * HZ)
-
-/*
- * Maximum number of events stored
- */
-#define APM_MAX_EVENTS         20
-
-/*
- * The per-file APM data
- */
-struct apm_user {
-       int             magic;
-       struct apm_user *       next;
-       unsigned int    suser: 1;
-       unsigned int    writer: 1;
-       unsigned int    reader: 1;
-       unsigned int    suspend_wait: 1;
-       int             suspend_result;
-       int             suspends_pending;
-       int             standbys_pending;
-       int             suspends_read;
-       int             standbys_read;
-       int             event_head;
-       int             event_tail;
-       apm_event_t     events[APM_MAX_EVENTS];
-};
-
-/*
- * The magic number in apm_user
- */
-#define APM_BIOS_MAGIC         0x4101
-
-/*
- * idle percentage above which bios idle calls are done
- */
-#ifdef CONFIG_APM_CPU_IDLE
-#define DEFAULT_IDLE_THRESHOLD 95
-#else
-#define DEFAULT_IDLE_THRESHOLD 100
-#endif
-#define DEFAULT_IDLE_PERIOD    (100 / 3)
-
-/*
- * Local variables
- */
-static struct {
-       unsigned long   offset;
-       unsigned short  segment;
-}                              apm_bios_entry;
-static int                     clock_slowed;
-static int                     idle_threshold = DEFAULT_IDLE_THRESHOLD;
-static int                     idle_period = DEFAULT_IDLE_PERIOD;
-static int                     set_pm_idle;
-static int                     suspends_pending;
-static int                     standbys_pending;
-static int                     ignore_sys_suspend;
-static int                     ignore_normal_resume;
-static int                     bounce_interval = DEFAULT_BOUNCE_INTERVAL;
-
-#ifdef CONFIG_APM_RTC_IS_GMT
-#      define  clock_cmos_diff 0
-#      define  got_clock_diff  1
-#else
-static long                    clock_cmos_diff;
-static int                     got_clock_diff;
-#endif
-static int                     debug;
-static int                     smp;
-static int                     apm_disabled = -1;
-#ifdef CONFIG_SMP
-static int                     power_off;
-#else
-static int                     power_off = 1;
-#endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int                     realmode_power_off = 1;
-#else
-static int                     realmode_power_off;
-#endif
-static int                     exit_kapmd;
-static int                     kapmd_running;
-#ifdef CONFIG_APM_ALLOW_INTS
-static int                     allow_ints = 1;
-#else
-static int                     allow_ints;
-#endif
-static int                     broken_psr;
-
-static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
-static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
-static struct apm_user *       user_list;
-static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct      bad_bios_desc = { 0, 0x00409200 };
-
-static char                    driver_version[] = "1.16ac";    /* no spaces */
-
-/*
- *     APM event names taken from the APM 1.2 specification. These are
- *     the message codes that the BIOS uses to tell us about events
- */
-static char *  apm_event_name[] = {
-       "system standby",
-       "system suspend",
-       "normal resume",
-       "critical resume",
-       "low battery",
-       "power status change",
-       "update time",
-       "critical suspend",
-       "user standby",
-       "user suspend",
-       "system standby resume",
-       "capabilities change"
-};
-#define NR_APM_EVENT_NAME      \
-               (sizeof(apm_event_name) / sizeof(apm_event_name[0]))
-
-typedef struct lookup_t {
-       int     key;
-       char *  msg;
-} lookup_t;
-
-/*
- *     The BIOS returns a set of standard error codes in AX when the
- *     carry flag is set.
- */
- 
-static const lookup_t error_table[] = {
-/* N/A { APM_SUCCESS,          "Operation succeeded" }, */
-       { APM_DISABLED,         "Power management disabled" },
-       { APM_CONNECTED,        "Real mode interface already connected" },
-       { APM_NOT_CONNECTED,    "Interface not connected" },
-       { APM_16_CONNECTED,     "16 bit interface already connected" },
-/* N/A { APM_16_UNSUPPORTED,   "16 bit interface not supported" }, */
-       { APM_32_CONNECTED,     "32 bit interface already connected" },
-       { APM_32_UNSUPPORTED,   "32 bit interface not supported" },
-       { APM_BAD_DEVICE,       "Unrecognized device ID" },
-       { APM_BAD_PARAM,        "Parameter out of range" },
-       { APM_NOT_ENGAGED,      "Interface not engaged" },
-       { APM_BAD_FUNCTION,     "Function not supported" },
-       { APM_RESUME_DISABLED,  "Resume timer disabled" },
-       { APM_BAD_STATE,        "Unable to enter requested state" },
-/* N/A { APM_NO_EVENTS,        "No events pending" }, */
-       { APM_NO_ERROR,         "BIOS did not set a return code" },
-       { APM_NOT_PRESENT,      "No APM present" }
-};
-#define ERROR_COUNT    (sizeof(error_table)/sizeof(lookup_t))
-
-/**
- *     apm_error       -       display an APM error
- *     @str: information string
- *     @err: APM BIOS return code
- *
- *     Write a meaningful log entry to the kernel log in the event of
- *     an APM error.
- */
- 
-static void apm_error(char *str, int err)
-{
-       int     i;
-
-       for (i = 0; i < ERROR_COUNT; i++)
-               if (error_table[i].key == err) break;
-       if (i < ERROR_COUNT)
-               printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
-       else
-               printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
-                       str, err);
-}
-
-/*
- * Lock APM functionality to physical CPU 0
- */
- 
-#ifdef CONFIG_SMP
-
-static cpumask_t apm_save_cpus(void)
-{
-       cpumask_t x = current->cpus_allowed;
-       /* Some bioses don't like being called from CPU != 0 */
-       set_cpus_allowed(current, cpumask_of_cpu(0));
-       BUG_ON(smp_processor_id() != 0);
-       return x;
-}
-
-static inline void apm_restore_cpus(cpumask_t mask)
-{
-       set_cpus_allowed(current, mask);
-}
-
-#else
-
-/*
- *     No CPU lockdown needed on a uniprocessor
- */
- 
-#define apm_save_cpus()                (current->cpus_allowed)
-#define apm_restore_cpus(x)    (void)(x)
-
-#endif
-
-/*
- * These are the actual BIOS calls.  Depending on APM_ZERO_SEGS and
- * apm_info.allow_ints, we are being really paranoid here!  Not only
- * are interrupts disabled, but all the segment registers (except SS)
- * are saved and zeroed this means that if the BIOS tries to reference
- * any data without explicitly loading the segment registers, the kernel
- * will fault immediately rather than have some unforeseen circumstances
- * for the rest of the kernel.  And it will be very obvious!  :-) Doing
- * this depends on CS referring to the same physical memory as DS so that
- * DS can be zeroed before the call. Unfortunately, we can't do anything
- * about the stack segment/pointer.  Also, we tell the compiler that
- * everything could change.
- *
- * Also, we KNOW that for the non error case of apm_bios_call, there
- * is no useful data returned in the low order 8 bits of eax.
- */
-#define APM_DO_CLI     \
-       if (apm_info.allow_ints) \
-               local_irq_enable(); \
-       else \
-               local_irq_disable();
-
-#ifdef APM_ZERO_SEGS
-#      define APM_DECL_SEGS \
-               unsigned int saved_fs; unsigned int saved_gs;
-#      define APM_DO_SAVE_SEGS \
-               savesegment(fs, saved_fs); savesegment(gs, saved_gs)
-#      define APM_DO_RESTORE_SEGS \
-               loadsegment(fs, saved_fs); loadsegment(gs, saved_gs)
-#else
-#      define APM_DECL_SEGS
-#      define APM_DO_SAVE_SEGS
-#      define APM_DO_RESTORE_SEGS
-#endif
-
-/**
- *     apm_bios_call   -       Make an APM BIOS 32bit call
- *     @func: APM function to execute
- *     @ebx_in: EBX register for call entry
- *     @ecx_in: ECX register for call entry
- *     @eax: EAX register return
- *     @ebx: EBX register return
- *     @ecx: ECX register return
- *     @edx: EDX register return
- *     @esi: ESI register return
- *
- *     Make an APM call using the 32bit protected mode interface. The
- *     caller is responsible for knowing if APM BIOS is configured and
- *     enabled. This call can disable interrupts for a long period of
- *     time on some laptops.  The return value is in AH and the carry
- *     flag is loaded into AL.  If there is an error, then the error
- *     code is returned in AH (bits 8-15 of eax) and this function
- *     returns non-zero.
- */
- 
-static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
-       u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
-{
-       APM_DECL_SEGS
-       unsigned long           flags;
-       cpumask_t               cpus;
-       int                     cpu;
-       struct desc_struct      save_desc_40;
-
-       cpus = apm_save_cpus();
-       
-       cpu = get_cpu();
-       save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-       get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
-
-       local_save_flags(flags);
-       APM_DO_CLI;
-       APM_DO_SAVE_SEGS;
-       apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
-       APM_DO_RESTORE_SEGS;
-       local_irq_restore(flags);
-       get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
-       put_cpu();
-       apm_restore_cpus(cpus);
-       
-       return *eax & 0xff;
-}
-
-/**
- *     apm_bios_call_simple    -       make a simple APM BIOS 32bit call
- *     @func: APM function to invoke
- *     @ebx_in: EBX register value for BIOS call
- *     @ecx_in: ECX register value for BIOS call
- *     @eax: EAX register on return from the BIOS call
- *
- *     Make a BIOS call that does only returns one value, or just status.
- *     If there is an error, then the error code is returned in AH
- *     (bits 8-15 of eax) and this function returns non-zero. This is
- *     used for simpler BIOS operations. This call may hold interrupts
- *     off for a long time on some laptops.
- */
-
-static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
-{
-       u8                      error;
-       APM_DECL_SEGS
-       unsigned long           flags;
-       cpumask_t               cpus;
-       int                     cpu;
-       struct desc_struct      save_desc_40;
-
-
-       cpus = apm_save_cpus();
-       
-       cpu = get_cpu();
-       save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-       get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
-
-       local_save_flags(flags);
-       APM_DO_CLI;
-       APM_DO_SAVE_SEGS;
-       error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
-       APM_DO_RESTORE_SEGS;
-       local_irq_restore(flags);
-       get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
-       put_cpu();
-       apm_restore_cpus(cpus);
-       return error;
-}
-
-/**
- *     apm_driver_version      -       APM driver version
- *     @val:   loaded with the APM version on return
- *
- *     Retrieve the APM version supported by the BIOS. This is only
- *     supported for APM 1.1 or higher. An error indicates APM 1.0 is
- *     probably present.
- *
- *     On entry val should point to a value indicating the APM driver
- *     version with the high byte being the major and the low byte the
- *     minor number both in BCD
- *
- *     On return it will hold the BIOS revision supported in the
- *     same format.
- */
-
-static int apm_driver_version(u_short *val)
-{
-       u32     eax;
-
-       if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
-               return (eax >> 8) & 0xff;
-       *val = eax;
-       return APM_SUCCESS;
-}
-
-/**
- *     apm_get_event   -       get an APM event from the BIOS
- *     @event: pointer to the event
- *     @info: point to the event information
- *
- *     The APM BIOS provides a polled information for event
- *     reporting. The BIOS expects to be polled at least every second
- *     when events are pending. When a message is found the caller should
- *     poll until no more messages are present.  However, this causes
- *     problems on some laptops where a suspend event notification is
- *     not cleared until it is acknowledged.
- *
- *     Additional information is returned in the info pointer, providing
- *     that APM 1.2 is in use. If no messges are pending the value 0x80
- *     is returned (No power management events pending).
- */
- 
-static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
-{
-       u32     eax;
-       u32     ebx;
-       u32     ecx;
-       u32     dummy;
-
-       if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
-                       &dummy, &dummy))
-               return (eax >> 8) & 0xff;
-       *event = ebx;
-       if (apm_info.connection_version < 0x0102)
-               *info = ~0; /* indicate info not valid */
-       else
-               *info = ecx;
-       return APM_SUCCESS;
-}
-
-/**
- *     set_power_state -       set the power management state
- *     @what: which items to transition
- *     @state: state to transition to
- *
- *     Request an APM change of state for one or more system devices. The
- *     processor state must be transitioned last of all. what holds the
- *     class of device in the upper byte and the device number (0xFF for
- *     all) for the object to be transitioned.
- *
- *     The state holds the state to transition to, which may in fact
- *     be an acceptance of a BIOS requested state change.
- */
- 
-static int set_power_state(u_short what, u_short state)
-{
-       u32     eax;
-
-       if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
-               return (eax >> 8) & 0xff;
-       return APM_SUCCESS;
-}
-
-/**
- *     set_system_power_state - set system wide power state
- *     @state: which state to enter
- *
- *     Transition the entire system into a new APM power state.
- */
- 
-static int set_system_power_state(u_short state)
-{
-       return set_power_state(APM_DEVICE_ALL, state);
-}
-
-/**
- *     apm_do_idle     -       perform power saving
- *
- *     This function notifies the BIOS that the processor is (in the view
- *     of the OS) idle. It returns -1 in the event that the BIOS refuses
- *     to handle the idle request. On a success the function returns 1
- *     if the BIOS did clock slowing or 0 otherwise.
- */
- 
-static int apm_do_idle(void)
-{
-       u32     eax;
-
-       if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
-               static unsigned long t;
-
-               /* This always fails on some SMP boards running UP kernels.
-                * Only report the failure the first 5 times.
-                */
-               if (++t < 5)
-               {
-                       printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
-                                       (eax >> 8) & 0xff);
-                       t = jiffies;
-               }
-               return -1;
-       }
-       clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0;
-       return clock_slowed;
-}
-
-/**
- *     apm_do_busy     -       inform the BIOS the CPU is busy
- *
- *     Request that the BIOS brings the CPU back to full performance. 
- */
- 
-static void apm_do_busy(void)
-{
-       u32     dummy;
-
-       if (clock_slowed || ALWAYS_CALL_BUSY) {
-               (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
-               clock_slowed = 0;
-       }
-}
-
-/*
- * If no process has really been interested in
- * the CPU for some time, we want to call BIOS
- * power management - we probably want
- * to conserve power.
- */
-#define IDLE_CALC_LIMIT   (HZ * 100)
-#define IDLE_LEAKY_MAX    16
-
-static void (*original_pm_idle)(void);
-
-extern void default_idle(void);
-
-/**
- * apm_cpu_idle                -       cpu idling for APM capable Linux
- *
- * This is the idling function the kernel executes when APM is available. It 
- * tries to do BIOS powermanagement based on the average system idle time.
- * Furthermore it calls the system default idle routine.
- */
-
-static void apm_cpu_idle(void)
-{
-       static int use_apm_idle; /* = 0 */
-       static unsigned int last_jiffies; /* = 0 */
-       static unsigned int last_stime; /* = 0 */
-
-       int apm_idle_done = 0;
-       unsigned int jiffies_since_last_check = jiffies - last_jiffies;
-       unsigned int bucket;
-
-recalc:
-       if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
-               use_apm_idle = 0;
-               last_jiffies = jiffies;
-               last_stime = current->stime;
-       } else if (jiffies_since_last_check > idle_period) {
-               unsigned int idle_percentage;
-
-               idle_percentage = current->stime - last_stime;
-               idle_percentage *= 100;
-               idle_percentage /= jiffies_since_last_check;
-               use_apm_idle = (idle_percentage > idle_threshold);
-               if (apm_info.forbid_idle)
-                       use_apm_idle = 0;
-               last_jiffies = jiffies;
-               last_stime = current->stime;
-       }
-
-       bucket = IDLE_LEAKY_MAX;
-
-       while (!need_resched()) {
-               if (use_apm_idle) {
-                       unsigned int t;
-
-                       t = jiffies;
-                       switch (apm_do_idle()) {
-                       case 0: apm_idle_done = 1;
-                               if (t != jiffies) {
-                                       if (bucket) {
-                                               bucket = IDLE_LEAKY_MAX;
-                                               continue;
-                                       }
-                               } else if (bucket) {
-                                       bucket--;
-                                       continue;
-                               }
-                               break;
-                       case 1: apm_idle_done = 1;
-                               break;
-                       default: /* BIOS refused */
-                               break;
-                       }
-               }
-               if (original_pm_idle)
-                       original_pm_idle();
-               else
-                       default_idle();
-               jiffies_since_last_check = jiffies - last_jiffies;
-               if (jiffies_since_last_check > idle_period)
-                       goto recalc;
-       }
-
-       if (apm_idle_done)
-               apm_do_busy();
-}
-
-/**
- *     apm_power_off   -       ask the BIOS to power off
- *
- *     Handle the power off sequence. This is the one piece of code we
- *     will execute even on SMP machines. In order to deal with BIOS
- *     bugs we support real mode APM BIOS power off calls. We also make
- *     the SMP call on CPU0 as some systems will only honour this call
- *     on their first cpu.
- */
- 
-static void apm_power_off(void)
-{
-       unsigned char   po_bios_call[] = {
-               0xb8, 0x00, 0x10,       /* movw  $0x1000,ax  */
-               0x8e, 0xd0,             /* movw  ax,ss       */
-               0xbc, 0x00, 0xf0,       /* movw  $0xf000,sp  */
-               0xb8, 0x07, 0x53,       /* movw  $0x5307,ax  */
-               0xbb, 0x01, 0x00,       /* movw  $0x0001,bx  */
-               0xb9, 0x03, 0x00,       /* movw  $0x0003,cx  */
-               0xcd, 0x15              /* int   $0x15       */
-       };
-
-       /* Some bioses don't like being called from CPU != 0 */
-       if (apm_info.realmode_power_off)
-       {
-               (void)apm_save_cpus();
-               machine_real_restart(po_bios_call, sizeof(po_bios_call));
-       }
-       else
-               (void) set_system_power_state(APM_STATE_OFF);
-}
-
-#ifdef CONFIG_APM_DO_ENABLE
-
-/**
- *     apm_enable_power_management - enable BIOS APM power management
- *     @enable: enable yes/no
- *
- *     Enable or disable the APM BIOS power services. 
- */
- 
-static int apm_enable_power_management(int enable)
-{
-       u32     eax;
-
-       if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
-               return APM_NOT_ENGAGED;
-       if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
-                       enable, &eax))
-               return (eax >> 8) & 0xff;
-       if (enable)
-               apm_info.bios.flags &= ~APM_BIOS_DISABLED;
-       else
-               apm_info.bios.flags |= APM_BIOS_DISABLED;
-       return APM_SUCCESS;
-}
-#endif
-
-/**
- *     apm_get_power_status    -       get current power state
- *     @status: returned status
- *     @bat: battery info
- *     @life: estimated life
- *
- *     Obtain the current power status from the APM BIOS. We return a
- *     status which gives the rough battery status, and current power
- *     source. The bat value returned give an estimate as a percentage
- *     of life and a status value for the battery. The estimated life
- *     if reported is a lifetime in secodnds/minutes at current powwer
- *     consumption.
- */
- 
-static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
-{
-       u32     eax;
-       u32     ebx;
-       u32     ecx;
-       u32     edx;
-       u32     dummy;
-
-       if (apm_info.get_power_status_broken)
-               return APM_32_UNSUPPORTED;
-       if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
-                       &eax, &ebx, &ecx, &edx, &dummy))
-               return (eax >> 8) & 0xff;
-       *status = ebx;
-       *bat = ecx;
-       if (apm_info.get_power_status_swabinminutes) {
-               *life = swab16((u16)edx);
-               *life |= 0x8000;
-       } else
-               *life = edx;
-       return APM_SUCCESS;
-}
-
-#if 0
-static int apm_get_battery_status(u_short which, u_short *status,
-                                 u_short *bat, u_short *life, u_short *nbat)
-{
-       u32     eax;
-       u32     ebx;
-       u32     ecx;
-       u32     edx;
-       u32     esi;
-
-       if (apm_info.connection_version < 0x0102) {
-               /* pretend we only have one battery. */
-               if (which != 1)
-                       return APM_BAD_DEVICE;
-               *nbat = 1;
-               return apm_get_power_status(status, bat, life);
-       }
-
-       if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
-                       &ebx, &ecx, &edx, &esi))
-               return (eax >> 8) & 0xff;
-       *status = ebx;
-       *bat = ecx;
-       *life = edx;
-       *nbat = esi;
-       return APM_SUCCESS;
-}
-#endif
-
-/**
- *     apm_engage_power_management     -       enable PM on a device
- *     @device: identity of device
- *     @enable: on/off
- *
- *     Activate or deactive power management on either a specific device
- *     or the entire system (%APM_DEVICE_ALL).
- */
- 
-static int apm_engage_power_management(u_short device, int enable)
-{
-       u32     eax;
-
-       if ((enable == 0) && (device == APM_DEVICE_ALL)
-           && (apm_info.bios.flags & APM_BIOS_DISABLED))
-               return APM_DISABLED;
-       if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
-               return (eax >> 8) & 0xff;
-       if (device == APM_DEVICE_ALL) {
-               if (enable)
-                       apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
-               else
-                       apm_info.bios.flags |= APM_BIOS_DISENGAGED;
-       }
-       return APM_SUCCESS;
-}
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-
-/**
- *     apm_console_blank       -       blank the display
- *     @blank: on/off
- *
- *     Attempt to blank the console, firstly by blanking just video device
- *     zero, and if that fails (some BIOSes don't support it) then it blanks
- *     all video devices. Typically the BIOS will do laptop backlight and
- *     monitor powerdown for us.
- */
- 
-static int apm_console_blank(int blank)
-{
-       int     error;
-       u_short state;
-
-       state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
-       /* Blank the first display device */
-       error = set_power_state(0x100, state);
-       if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
-               /* try to blank them all instead */
-               error = set_power_state(0x1ff, state);
-               if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
-                       /* try to blank device one instead */
-                       error = set_power_state(0x101, state);
-       }
-       if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
-               return 1;
-       if (error == APM_NOT_ENGAGED) {
-               static int tried;
-               int eng_error;
-               if (tried++ == 0) {
-                       eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1);
-                       if (eng_error) {
-                               apm_error("set display", error);
-                               apm_error("engage interface", eng_error);
-                               return 0;
-                       } else
-                               return apm_console_blank(blank);
-               }
-       }
-       apm_error("set display", error);
-       return 0;
-}
-#endif
-
-static int queue_empty(struct apm_user *as)
-{
-       return as->event_head == as->event_tail;
-}
-
-static apm_event_t get_queued_event(struct apm_user *as)
-{
-       as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
-       return as->events[as->event_tail];
-}
-
-static void queue_event(apm_event_t event, struct apm_user *sender)
-{
-       struct apm_user *       as;
-
-       spin_lock(&user_list_lock);
-       if (user_list == NULL)
-               goto out;
-       for (as = user_list; as != NULL; as = as->next) {
-               if ((as == sender) || (!as->reader))
-                       continue;
-               as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
-               if (as->event_head == as->event_tail) {
-                       static int notified;
-
-                       if (notified++ == 0)
-                           printk(KERN_ERR "apm: an event queue overflowed\n");
-                       as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
-               }
-               as->events[as->event_head] = event;
-               if ((!as->suser) || (!as->writer))
-                       continue;
-               switch (event) {
-               case APM_SYS_SUSPEND:
-               case APM_USER_SUSPEND:
-                       as->suspends_pending++;
-                       suspends_pending++;
-                       break;
-
-               case APM_SYS_STANDBY:
-               case APM_USER_STANDBY:
-                       as->standbys_pending++;
-                       standbys_pending++;
-                       break;
-               }
-       }
-       wake_up_interruptible(&apm_waitqueue);
-out:
-       spin_unlock(&user_list_lock);
-}
-
-static void set_time(void)
-{
-       if (got_clock_diff) {   /* Must know time zone in order to set clock */
-               xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
-               xtime.tv_nsec = 0; 
-       } 
-}
-
-static void get_time_diff(void)
-{
-#ifndef CONFIG_APM_RTC_IS_GMT
-       /*
-        * Estimate time zone so that set_time can update the clock
-        */
-       clock_cmos_diff = -get_cmos_time();
-       clock_cmos_diff += get_seconds();
-       got_clock_diff = 1;
-#endif
-}
-
-static void reinit_timer(void)
-{
-#ifdef INIT_TIMER_AFTER_SUSPEND
-       unsigned long flags;
-
-       spin_lock_irqsave(&i8253_lock, flags);
-       /* set the clock to 100 Hz */
-       outb_p(0x34, PIT_MODE);         /* binary, mode 2, LSB/MSB, ch 0 */
-       udelay(10);
-       outb_p(LATCH & 0xff, PIT_CH0);  /* LSB */
-       udelay(10);
-       outb(LATCH >> 8, PIT_CH0);      /* MSB */
-       udelay(10);
-       spin_unlock_irqrestore(&i8253_lock, flags);
-#endif
-}
-
-static int suspend(int vetoable)
-{
-       int             err;
-       struct apm_user *as;
-
-       if (pm_send_all(PM_SUSPEND, (void *)3)) {
-               /* Vetoed */
-               if (vetoable) {
-                       if (apm_info.connection_version > 0x100)
-                               set_system_power_state(APM_STATE_REJECT);
-                       err = -EBUSY;
-                       ignore_sys_suspend = 0;
-                       printk(KERN_WARNING "apm: suspend was vetoed.\n");
-                       goto out;
-               }
-               printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
-       }
-
-       device_suspend(PMSG_SUSPEND);
-       local_irq_disable();
-       device_power_down(PMSG_SUSPEND);
-
-       /* serialize with the timer interrupt */
-       write_seqlock(&xtime_lock);
-
-       /* protect against access to timer chip registers */
-       spin_lock(&i8253_lock);
-
-       get_time_diff();
-       /*
-        * Irq spinlock must be dropped around set_system_power_state.
-        * We'll undo any timer changes due to interrupts below.
-        */
-       spin_unlock(&i8253_lock);
-       write_sequnlock(&xtime_lock);
-       local_irq_enable();
-
-       save_processor_state();
-       err = set_system_power_state(APM_STATE_SUSPEND);
-       ignore_normal_resume = 1;
-       restore_processor_state();
-
-       local_irq_disable();
-       write_seqlock(&xtime_lock);
-       spin_lock(&i8253_lock);
-       reinit_timer();
-       set_time();
-
-       spin_unlock(&i8253_lock);
-       write_sequnlock(&xtime_lock);
-
-       if (err == APM_NO_ERROR)
-               err = APM_SUCCESS;
-       if (err != APM_SUCCESS)
-               apm_error("suspend", err);
-       err = (err == APM_SUCCESS) ? 0 : -EIO;
-       device_power_up();
-       local_irq_enable();
-       device_resume();
-       pm_send_all(PM_RESUME, (void *)0);
-       queue_event(APM_NORMAL_RESUME, NULL);
- out:
-       spin_lock(&user_list_lock);
-       for (as = user_list; as != NULL; as = as->next) {
-               as->suspend_wait = 0;
-               as->suspend_result = err;
-       }
-       spin_unlock(&user_list_lock);
-       wake_up_interruptible(&apm_suspend_waitqueue);
-       return err;
-}
-
-static void standby(void)
-{
-       int     err;
-
-       local_irq_disable();
-       device_power_down(PMSG_SUSPEND);
-       /* serialize with the timer interrupt */
-       write_seqlock(&xtime_lock);
-       /* If needed, notify drivers here */
-       get_time_diff();
-       write_sequnlock(&xtime_lock);
-       local_irq_enable();
-
-       err = set_system_power_state(APM_STATE_STANDBY);
-       if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
-               apm_error("standby", err);
-
-       local_irq_disable();
-       device_power_up();
-       local_irq_enable();
-}
-
-static apm_event_t get_event(void)
-{
-       int             error;
-       apm_event_t     event;
-       apm_eventinfo_t info;
-
-       static int notified;
-
-       /* we don't use the eventinfo */
-       error = apm_get_event(&event, &info);
-       if (error == APM_SUCCESS)
-               return event;
-
-       if ((error != APM_NO_EVENTS) && (notified++ == 0))
-               apm_error("get_event", error);
-
-       return 0;
-}
-
-static void check_events(void)
-{
-       apm_event_t             event;
-       static unsigned long    last_resume;
-       static int              ignore_bounce;
-
-       while ((event = get_event()) != 0) {
-               if (debug) {
-                       if (event <= NR_APM_EVENT_NAME)
-                               printk(KERN_DEBUG "apm: received %s notify\n",
-                                      apm_event_name[event - 1]);
-                       else
-                               printk(KERN_DEBUG "apm: received unknown "
-                                      "event 0x%02x\n", event);
-               }
-               if (ignore_bounce
-                   && ((jiffies - last_resume) > bounce_interval))
-                       ignore_bounce = 0;
-
-               switch (event) {
-               case APM_SYS_STANDBY:
-               case APM_USER_STANDBY:
-                       queue_event(event, NULL);
-                       if (standbys_pending <= 0)
-                               standby();
-                       break;
-
-               case APM_USER_SUSPEND:
-#ifdef CONFIG_APM_IGNORE_USER_SUSPEND
-                       if (apm_info.connection_version > 0x100)
-                               set_system_power_state(APM_STATE_REJECT);
-                       break;
-#endif
-               case APM_SYS_SUSPEND:
-                       if (ignore_bounce) {
-                               if (apm_info.connection_version > 0x100)
-                                       set_system_power_state(APM_STATE_REJECT);
-                               break;
-                       }
-                       /*
-                        * If we are already processing a SUSPEND,
-                        * then further SUSPEND events from the BIOS
-                        * will be ignored.  We also return here to
-                        * cope with the fact that the Thinkpads keep
-                        * sending a SUSPEND event until something else
-                        * happens!
-                        */
-                       if (ignore_sys_suspend)
-                               return;
-                       ignore_sys_suspend = 1;
-                       queue_event(event, NULL);
-                       if (suspends_pending <= 0)
-                               (void) suspend(1);
-                       break;
-
-               case APM_NORMAL_RESUME:
-               case APM_CRITICAL_RESUME:
-               case APM_STANDBY_RESUME:
-                       ignore_sys_suspend = 0;
-                       last_resume = jiffies;
-                       ignore_bounce = 1;
-                       if ((event != APM_NORMAL_RESUME)
-                           || (ignore_normal_resume == 0)) {
-                               write_seqlock_irq(&xtime_lock);
-                               set_time();
-                               write_sequnlock_irq(&xtime_lock);
-                               device_resume();
-                               pm_send_all(PM_RESUME, (void *)0);
-                               queue_event(event, NULL);
-                       }
-                       ignore_normal_resume = 0;
-                       break;
-
-               case APM_CAPABILITY_CHANGE:
-               case APM_LOW_BATTERY:
-               case APM_POWER_STATUS_CHANGE:
-                       queue_event(event, NULL);
-                       /* If needed, notify drivers here */
-                       break;
-
-               case APM_UPDATE_TIME:
-                       write_seqlock_irq(&xtime_lock);
-                       set_time();
-                       write_sequnlock_irq(&xtime_lock);
-                       break;
-
-               case APM_CRITICAL_SUSPEND:
-                       /*
-                        * We are not allowed to reject a critical suspend.
-                        */
-                       (void) suspend(0);
-                       break;
-               }
-       }
-}
-
-static void apm_event_handler(void)
-{
-       static int      pending_count = 4;
-       int             err;
-
-       if ((standbys_pending > 0) || (suspends_pending > 0)) {
-               if ((apm_info.connection_version > 0x100) &&
-                               (pending_count-- <= 0)) {
-                       pending_count = 4;
-                       if (debug)
-                               printk(KERN_DEBUG "apm: setting state busy\n");
-                       err = set_system_power_state(APM_STATE_BUSY);
-                       if (err)
-                               apm_error("busy", err);
-               }
-       } else
-               pending_count = 4;
-       check_events();
-}
-
-/*
- * This is the APM thread main loop.
- */
-
-static void apm_mainloop(void)
-{
-       DECLARE_WAITQUEUE(wait, current);
-
-       add_wait_queue(&apm_waitqueue, &wait);
-       set_current_state(TASK_INTERRUPTIBLE);
-       for (;;) {
-               schedule_timeout(APM_CHECK_TIMEOUT);
-               if (exit_kapmd)
-                       break;
-               /*
-                * Ok, check all events, check for idle (and mark us sleeping
-                * so as not to count towards the load average)..
-                */
-               set_current_state(TASK_INTERRUPTIBLE);
-               apm_event_handler();
-       }
-       remove_wait_queue(&apm_waitqueue, &wait);
-}
-
-static int check_apm_user(struct apm_user *as, const char *func)
-{
-       if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
-               printk(KERN_ERR "apm: %s passed bad filp\n", func);
-               return 1;
-       }
-       return 0;
-}
-
-static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
-{
-       struct apm_user *       as;
-       int                     i;
-       apm_event_t             event;
-
-       as = fp->private_data;
-       if (check_apm_user(as, "read"))
-               return -EIO;
-       if ((int)count < sizeof(apm_event_t))
-               return -EINVAL;
-       if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK))
-               return -EAGAIN;
-       wait_event_interruptible(apm_waitqueue, !queue_empty(as));
-       i = count;
-       while ((i >= sizeof(event)) && !queue_empty(as)) {
-               event = get_queued_event(as);
-               if (copy_to_user(buf, &event, sizeof(event))) {
-                       if (i < count)
-                               break;
-                       return -EFAULT;
-               }
-               switch (event) {
-               case APM_SYS_SUSPEND:
-               case APM_USER_SUSPEND:
-                       as->suspends_read++;
-                       break;
-
-               case APM_SYS_STANDBY:
-               case APM_USER_STANDBY:
-                       as->standbys_read++;
-                       break;
-               }
-               buf += sizeof(event);
-               i -= sizeof(event);
-       }
-       if (i < count)
-               return count - i;
-       if (signal_pending(current))
-               return -ERESTARTSYS;
-       return 0;
-}
-
-static unsigned int do_poll(struct file *fp, poll_table * wait)
-{
-       struct apm_user * as;
-
-       as = fp->private_data;
-       if (check_apm_user(as, "poll"))
-               return 0;
-       poll_wait(fp, &apm_waitqueue, wait);
-       if (!queue_empty(as))
-               return POLLIN | POLLRDNORM;
-       return 0;
-}
-
-static int do_ioctl(struct inode * inode, struct file *filp,
-                   u_int cmd, u_long arg)
-{
-       struct apm_user *       as;
-
-       as = filp->private_data;
-       if (check_apm_user(as, "ioctl"))
-               return -EIO;
-       if ((!as->suser) || (!as->writer))
-               return -EPERM;
-       switch (cmd) {
-       case APM_IOC_STANDBY:
-               if (as->standbys_read > 0) {
-                       as->standbys_read--;
-                       as->standbys_pending--;
-                       standbys_pending--;
-               } else
-                       queue_event(APM_USER_STANDBY, as);
-               if (standbys_pending <= 0)
-                       standby();
-               break;
-       case APM_IOC_SUSPEND:
-               if (as->suspends_read > 0) {
-                       as->suspends_read--;
-                       as->suspends_pending--;
-                       suspends_pending--;
-               } else
-                       queue_event(APM_USER_SUSPEND, as);
-               if (suspends_pending <= 0) {
-                       return suspend(1);
-               } else {
-                       as->suspend_wait = 1;
-                       wait_event_interruptible(apm_suspend_waitqueue,
-                                       as->suspend_wait == 0);
-                       return as->suspend_result;
-               }
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int do_release(struct inode * inode, struct file * filp)
-{
-       struct apm_user *       as;
-
-       as = filp->private_data;
-       if (check_apm_user(as, "release"))
-               return 0;
-       filp->private_data = NULL;
-       if (as->standbys_pending > 0) {
-               standbys_pending -= as->standbys_pending;
-               if (standbys_pending <= 0)
-                       standby();
-       }
-       if (as->suspends_pending > 0) {
-               suspends_pending -= as->suspends_pending;
-               if (suspends_pending <= 0)
-                       (void) suspend(1);
-       }
-       spin_lock(&user_list_lock);
-       if (user_list == as)
-               user_list = as->next;
-       else {
-               struct apm_user *       as1;
-
-               for (as1 = user_list;
-                    (as1 != NULL) && (as1->next != as);
-                    as1 = as1->next)
-                       ;
-               if (as1 == NULL)
-                       printk(KERN_ERR "apm: filp not in user list\n");
-               else
-                       as1->next = as->next;
-       }
-       spin_unlock(&user_list_lock);
-       kfree(as);
-       return 0;
-}
-
-static int do_open(struct inode * inode, struct file * filp)
-{
-       struct apm_user *       as;
-
-       as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
-       if (as == NULL) {
-               printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
-                      sizeof(*as));
-               return -ENOMEM;
-       }
-       as->magic = APM_BIOS_MAGIC;
-       as->event_tail = as->event_head = 0;
-       as->suspends_pending = as->standbys_pending = 0;
-       as->suspends_read = as->standbys_read = 0;
-       /*
-        * XXX - this is a tiny bit broken, when we consider BSD
-         * process accounting. If the device is opened by root, we
-        * instantly flag that we used superuser privs. Who knows,
-        * we might close the device immediately without doing a
-        * privileged operation -- cevans
-        */
-       as->suser = capable(CAP_SYS_ADMIN);
-       as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
-       as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
-       spin_lock(&user_list_lock);
-       as->next = user_list;
-       user_list = as;
-       spin_unlock(&user_list_lock);
-       filp->private_data = as;
-       return 0;
-}
-
-static int apm_get_info(char *buf, char **start, off_t fpos, int length)
-{
-       char *          p;
-       unsigned short  bx;
-       unsigned short  cx;
-       unsigned short  dx;
-       int             error;
-       unsigned short  ac_line_status = 0xff;
-       unsigned short  battery_status = 0xff;
-       unsigned short  battery_flag   = 0xff;
-       int             percentage     = -1;
-       int             time_units     = -1;
-       char            *units         = "?";
-
-       p = buf;
-
-       if ((num_online_cpus() == 1) &&
-           !(error = apm_get_power_status(&bx, &cx, &dx))) {
-               ac_line_status = (bx >> 8) & 0xff;
-               battery_status = bx & 0xff;
-               if ((cx & 0xff) != 0xff)
-                       percentage = cx & 0xff;
-
-               if (apm_info.connection_version > 0x100) {
-                       battery_flag = (cx >> 8) & 0xff;
-                       if (dx != 0xffff) {
-                               units = (dx & 0x8000) ? "min" : "sec";
-                               time_units = dx & 0x7fff;
-                       }
-               }
-       }
-       /* Arguments, with symbols from linux/apm_bios.h.  Information is
-          from the Get Power Status (0x0a) call unless otherwise noted.
-
-          0) Linux driver version (this will change if format changes)
-          1) APM BIOS Version.  Usually 1.0, 1.1 or 1.2.
-          2) APM flags from APM Installation Check (0x00):
-             bit 0: APM_16_BIT_SUPPORT
-             bit 1: APM_32_BIT_SUPPORT
-             bit 2: APM_IDLE_SLOWS_CLOCK
-             bit 3: APM_BIOS_DISABLED
-             bit 4: APM_BIOS_DISENGAGED
-          3) AC line status
-             0x00: Off-line
-             0x01: On-line
-             0x02: On backup power (BIOS >= 1.1 only)
-             0xff: Unknown
-          4) Battery status
-             0x00: High
-             0x01: Low
-             0x02: Critical
-             0x03: Charging
-             0x04: Selected battery not present (BIOS >= 1.2 only)
-             0xff: Unknown
-          5) Battery flag
-             bit 0: High
-             bit 1: Low
-             bit 2: Critical
-             bit 3: Charging
-             bit 7: No system battery
-             0xff: Unknown
-          6) Remaining battery life (percentage of charge):
-             0-100: valid
-             -1: Unknown
-          7) Remaining battery life (time units):
-             Number of remaining minutes or seconds
-             -1: Unknown
-          8) min = minutes; sec = seconds */
-
-       p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
-                    driver_version,
-                    (apm_info.bios.version >> 8) & 0xff,
-                    apm_info.bios.version & 0xff,
-                    apm_info.bios.flags,
-                    ac_line_status,
-                    battery_status,
-                    battery_flag,
-                    percentage,
-                    time_units,
-                    units);
-
-       return p - buf;
-}
-
-static int apm(void *unused)
-{
-       unsigned short  bx;
-       unsigned short  cx;
-       unsigned short  dx;
-       int             error;
-       char *          power_stat;
-       char *          bat_stat;
-
-       kapmd_running = 1;
-
-       daemonize("kapmd");
-
-       current->flags |= PF_NOFREEZE;
-
-#ifdef CONFIG_SMP
-       /* 2002/08/01 - WT
-        * This is to avoid random crashes at boot time during initialization
-        * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
-        * Some bioses don't like being called from CPU != 0.
-        * Method suggested by Ingo Molnar.
-        */
-       set_cpus_allowed(current, cpumask_of_cpu(0));
-       BUG_ON(smp_processor_id() != 0);
-#endif
-
-       if (apm_info.connection_version == 0) {
-               apm_info.connection_version = apm_info.bios.version;
-               if (apm_info.connection_version > 0x100) {
-                       /*
-                        * We only support BIOSs up to version 1.2
-                        */
-                       if (apm_info.connection_version > 0x0102)
-                               apm_info.connection_version = 0x0102;
-                       error = apm_driver_version(&apm_info.connection_version);
-                       if (error != APM_SUCCESS) {
-                               apm_error("driver version", error);
-                               /* Fall back to an APM 1.0 connection. */
-                               apm_info.connection_version = 0x100;
-                       }
-               }
-       }
-
-       if (debug)
-               printk(KERN_INFO "apm: Connection version %d.%d\n",
-                       (apm_info.connection_version >> 8) & 0xff,
-                       apm_info.connection_version & 0xff);
-
-#ifdef CONFIG_APM_DO_ENABLE
-       if (apm_info.bios.flags & APM_BIOS_DISABLED) {
-               /*
-                * This call causes my NEC UltraLite Versa 33/C to hang if it
-                * is booted with PM disabled but not in the docking station.
-                * Unfortunate ...
-                */
-               error = apm_enable_power_management(1);
-               if (error) {
-                       apm_error("enable power management", error);
-                       return -1;
-               }
-       }
-#endif
-
-       if ((apm_info.bios.flags & APM_BIOS_DISENGAGED)
-           && (apm_info.connection_version > 0x0100)) {
-               error = apm_engage_power_management(APM_DEVICE_ALL, 1);
-               if (error) {
-                       apm_error("engage power management", error);
-                       return -1;
-               }
-       }
-
-       if (debug && (num_online_cpus() == 1 || smp )) {
-               error = apm_get_power_status(&bx, &cx, &dx);
-               if (error)
-                       printk(KERN_INFO "apm: power status not available\n");
-               else {
-                       switch ((bx >> 8) & 0xff) {
-                       case 0: power_stat = "off line"; break;
-                       case 1: power_stat = "on line"; break;
-                       case 2: power_stat = "on backup power"; break;
-                       default: power_stat = "unknown"; break;
-                       }
-                       switch (bx & 0xff) {
-                       case 0: bat_stat = "high"; break;
-                       case 1: bat_stat = "low"; break;
-                       case 2: bat_stat = "critical"; break;
-                       case 3: bat_stat = "charging"; break;
-                       default: bat_stat = "unknown"; break;
-                       }
-                       printk(KERN_INFO
-                              "apm: AC %s, battery status %s, battery life ",
-                              power_stat, bat_stat);
-                       if ((cx & 0xff) == 0xff)
-                               printk("unknown\n");
-                       else
-                               printk("%d%%\n", cx & 0xff);
-                       if (apm_info.connection_version > 0x100) {
-                               printk(KERN_INFO
-                                      "apm: battery flag 0x%02x, battery life ",
-                                      (cx >> 8) & 0xff);
-                               if (dx == 0xffff)
-                                       printk("unknown\n");
-                               else
-                                       printk("%d %s\n", dx & 0x7fff,
-                                               (dx & 0x8000) ?
-                                               "minutes" : "seconds");
-                       }
-               }
-       }
-
-       /* Install our power off handler.. */
-       if (power_off)
-               pm_power_off = apm_power_off;
-
-       if (num_online_cpus() == 1 || smp) {
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-               console_blank_hook = apm_console_blank;
-#endif
-               apm_mainloop();
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-               console_blank_hook = NULL;
-#endif
-       }
-       kapmd_running = 0;
-
-       return 0;
-}
-
-#ifndef MODULE
-static int __init apm_setup(char *str)
-{
-       int     invert;
-
-       while ((str != NULL) && (*str != '\0')) {
-               if (strncmp(str, "off", 3) == 0)
-                       apm_disabled = 1;
-               if (strncmp(str, "on", 2) == 0)
-                       apm_disabled = 0;
-               if ((strncmp(str, "bounce-interval=", 16) == 0) ||
-                   (strncmp(str, "bounce_interval=", 16) == 0))
-                       bounce_interval = simple_strtol(str + 16, NULL, 0);
-               if ((strncmp(str, "idle-threshold=", 15) == 0) ||
-                   (strncmp(str, "idle_threshold=", 15) == 0))
-                       idle_threshold = simple_strtol(str + 15, NULL, 0);
-               if ((strncmp(str, "idle-period=", 12) == 0) ||
-                   (strncmp(str, "idle_period=", 12) == 0))
-                       idle_period = simple_strtol(str + 12, NULL, 0);
-               invert = (strncmp(str, "no-", 3) == 0) ||
-                       (strncmp(str, "no_", 3) == 0);
-               if (invert)
-                       str += 3;
-               if (strncmp(str, "debug", 5) == 0)
-                       debug = !invert;
-               if ((strncmp(str, "power-off", 9) == 0) ||
-                   (strncmp(str, "power_off", 9) == 0))
-                       power_off = !invert;
-               if (strncmp(str, "smp", 3) == 0)
-               {
-                       smp = !invert;
-                       idle_threshold = 100;
-               }
-               if ((strncmp(str, "allow-ints", 10) == 0) ||
-                   (strncmp(str, "allow_ints", 10) == 0))
-                       apm_info.allow_ints = !invert;
-               if ((strncmp(str, "broken-psr", 10) == 0) ||
-                   (strncmp(str, "broken_psr", 10) == 0))
-                       apm_info.get_power_status_broken = !invert;
-               if ((strncmp(str, "realmode-power-off", 18) == 0) ||
-                   (strncmp(str, "realmode_power_off", 18) == 0))
-                       apm_info.realmode_power_off = !invert;
-               str = strchr(str, ',');
-               if (str != NULL)
-                       str += strspn(str, ", \t");
-       }
-       return 1;
-}
-
-__setup("apm=", apm_setup);
-#endif
-
-static struct file_operations apm_bios_fops = {
-       .owner          = THIS_MODULE,
-       .read           = do_read,
-       .poll           = do_poll,
-       .ioctl          = do_ioctl,
-       .open           = do_open,
-       .release        = do_release,
-};
-
-static struct miscdevice apm_device = {
-       APM_MINOR_DEV,
-       "apm_bios",
-       &apm_bios_fops
-};
-
-
-/* Simple "print if true" callback */
-static int __init print_if_true(struct dmi_system_id *d)
-{
-       printk("%s\n", d->ident);
-       return 0;
-}
-
-/*
- * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was
- * disabled before the suspend. Linux used to get terribly confused by that.
- */
-static int __init broken_ps2_resume(struct dmi_system_id *d)
-{
-       printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
-       return 0;
-}
-
-/* Some bioses have a broken protected mode poweroff and need to use realmode */
-static int __init set_realmode_power_off(struct dmi_system_id *d)
-{
-       if (apm_info.realmode_power_off == 0) {
-               apm_info.realmode_power_off = 1;
-               printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
-       }
-       return 0;
-}
-
-/* Some laptops require interrupts to be enabled during APM calls */
-static int __init set_apm_ints(struct dmi_system_id *d)
-{
-       if (apm_info.allow_ints == 0) {
-               apm_info.allow_ints = 1;
-               printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
-       }
-       return 0;
-}
-
-/* Some APM bioses corrupt memory or just plain do not work */
-static int __init apm_is_horked(struct dmi_system_id *d)
-{
-       if (apm_info.disabled == 0) {
-               apm_info.disabled = 1;
-               printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
-       }
-       return 0;
-}
-
-static int __init apm_is_horked_d850md(struct dmi_system_id *d)
-{
-       if (apm_info.disabled == 0) {
-               apm_info.disabled = 1;
-               printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
-               printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
-               printk(KERN_INFO "download from support.intel.com \n");
-       }
-       return 0;
-}
-
-/* Some APM bioses hang on APM idle calls */
-static int __init apm_likes_to_melt(struct dmi_system_id *d)
-{
-       if (apm_info.forbid_idle == 0) {
-               apm_info.forbid_idle = 1;
-               printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
-       }
-       return 0;
-}
-
-/*
- *  Check for clue free BIOS implementations who use
- *  the following QA technique
- *
- *      [ Write BIOS Code ]<------
- *               |                ^
- *      < Does it Compile >----N--
- *               |Y               ^
- *     < Does it Boot Win98 >-N--
- *               |Y
- *           [Ship It]
- *
- *     Phoenix A04  08/24/2000 is known bad (Dell Inspiron 5000e)
- *     Phoenix A07  09/29/2000 is known good (Dell Inspiron 5000)
- */
-static int __init broken_apm_power(struct dmi_system_id *d)
-{
-       apm_info.get_power_status_broken = 1;
-       printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
-       return 0;
-}
-
-/*
- * This bios swaps the APM minute reporting bytes over (Many sony laptops
- * have this problem).
- */
-static int __init swab_apm_power_in_minutes(struct dmi_system_id *d)
-{
-       apm_info.get_power_status_swabinminutes = 1;
-       printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
-       return 0;
-}
-
-static struct dmi_system_id __initdata apm_dmi_table[] = {
-       {
-               print_if_true,
-               KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), },
-       },
-       {       /* Handle problems with APM on the C600 */
-               broken_ps2_resume, "Dell Latitude C600",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), },
-       },
-       {       /* Allow interrupts during suspend on Dell Latitude laptops*/
-               set_apm_ints, "Dell Latitude",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell Inspiron 2500",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
-       },
-       {       /* Allow interrupts during suspend on Dell Inspiron laptops*/
-               set_apm_ints, "Dell Inspiron", {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), },
-       },
-       {       /* Handle problems with APM on Inspiron 5000e */
-               broken_apm_power, "Dell Inspiron 5000e",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "A04"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), },
-       },
-       {       /* Handle problems with APM on Inspiron 2500 */
-               broken_apm_power, "Dell Inspiron 2500",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "A12"),
-                       DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell Dimension 4100",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
-       },
-       {       /* Allow interrupts during suspend on Compaq Laptops*/
-               set_apm_ints, "Compaq 12XL125",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
-                       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
-       },
-       {       /* Allow interrupts during APM or the clock goes slow */
-               set_apm_ints, "ASUSTeK",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), },
-       },
-       {       /* APM blows on shutdown */
-               apm_is_horked, "ABIT KX7-333[R]",
-               {       DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"),
-                       DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Trigem Delhi3",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Fujitsu-Siemens",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked_d850md, "Intel D850MD",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Intel D810EMO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell XPS-Z",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "A11"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Sharp PC-PJ/AX",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
-       },
-       {       /* APM crashes */
-               apm_is_horked, "Dell Inspiron 2500",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
-                       DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
-       },
-       {       /* APM idle hangs */
-               apm_likes_to_melt, "Jabil AMD",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), },
-       },
-       {       /* APM idle hangs */
-               apm_likes_to_melt, "AMI Bios",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0206H"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-N505VX */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"),
-                       DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-XG29 */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"),
-                       DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z600NE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z600NE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z505LS */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"),
-                       DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z505LS */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-F104K */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"),
-                       DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), },
-       },
-
-       {       /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-C1VE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"),
-                       DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), },
-       },
-       {       /* Handle problems with APM on Sony Vaio PCG-C1VE */
-               swab_apm_power_in_minutes, "Sony VAIO",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-                       DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"),
-                       DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), },
-       },
-       {       /* broken PM poweroff bios */
-               set_realmode_power_off, "Award Software v4.60 PGMA",
-               {       DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."),
-                       DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"),
-                       DMI_MATCH(DMI_BIOS_DATE, "134526184"), },
-       },
-
-       /* Generic per vendor APM settings  */
-
-       {       /* Allow interrupts during suspend on IBM laptops */
-               set_apm_ints, "IBM",
-               {       DMI_MATCH(DMI_SYS_VENDOR, "IBM"), },
-       },
-
-       { }
-};
-
-/*
- * Just start the APM thread. We do NOT want to do APM BIOS
- * calls from anything but the APM thread, if for no other reason
- * than the fact that we don't trust the APM BIOS. This way,
- * most common APM BIOS problems that lead to protection errors
- * etc will have at least some level of being contained...
- *
- * In short, if something bad happens, at least we have a choice
- * of just killing the apm thread..
- */
-static int __init apm_init(void)
-{
-       struct proc_dir_entry *apm_proc;
-       int ret;
-       int i;
-
-       dmi_check_system(apm_dmi_table);
-
-       if (apm_info.bios.version == 0) {
-               printk(KERN_INFO "apm: BIOS not found.\n");
-               return -ENODEV;
-       }
-       printk(KERN_INFO
-               "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
-               ((apm_info.bios.version >> 8) & 0xff),
-               (apm_info.bios.version & 0xff),
-               apm_info.bios.flags,
-               driver_version);
-       if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
-               printk(KERN_INFO "apm: no 32 bit BIOS support\n");
-               return -ENODEV;
-       }
-
-       if (allow_ints)
-               apm_info.allow_ints = 1;
-       if (broken_psr)
-               apm_info.get_power_status_broken = 1;
-       if (realmode_power_off)
-               apm_info.realmode_power_off = 1;
-       /* User can override, but default is to trust DMI */
-       if (apm_disabled != -1)
-               apm_info.disabled = apm_disabled;
-
-       /*
-        * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1
-        * but is reportedly a 1.0 BIOS.
-        */
-       if (apm_info.bios.version == 0x001)
-               apm_info.bios.version = 0x100;
-
-       /* BIOS < 1.2 doesn't set cseg_16_len */
-       if (apm_info.bios.version < 0x102)
-               apm_info.bios.cseg_16_len = 0; /* 64k */
-
-       if (debug) {
-               printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
-                       apm_info.bios.cseg, apm_info.bios.offset,
-                       apm_info.bios.cseg_16, apm_info.bios.dseg);
-               if (apm_info.bios.version > 0x100)
-                       printk(" cseg len %x, dseg len %x",
-                               apm_info.bios.cseg_len,
-                               apm_info.bios.dseg_len);
-               if (apm_info.bios.version > 0x101)
-                       printk(" cseg16 len %x", apm_info.bios.cseg_16_len);
-               printk("\n");
-       }
-
-       if (apm_info.disabled) {
-               printk(KERN_NOTICE "apm: disabled on user request.\n");
-               return -ENODEV;
-       }
-       if ((num_online_cpus() > 1) && !power_off && !smp) {
-               printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
-               apm_info.disabled = 1;
-               return -ENODEV;
-       }
-       if (PM_IS_ACTIVE()) {
-               printk(KERN_NOTICE "apm: overridden by ACPI.\n");
-               apm_info.disabled = 1;
-               return -ENODEV;
-       }
-       pm_active = 1;
-
-       /*
-        * Set up a segment that references the real mode segment 0x40
-        * that extends up to the end of page zero (that we have reserved).
-        * This is for buggy BIOS's that refer to (real mode) segment 0x40
-        * even though they are called in protected mode.
-        */
-       set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
-       _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-
-       apm_bios_entry.offset = apm_info.bios.offset;
-       apm_bios_entry.segment = APM_CS;
-
-       for (i = 0; i < NR_CPUS; i++) {
-               set_base(get_cpu_gdt_table(i)[APM_CS >> 3],
-                        __va((unsigned long)apm_info.bios.cseg << 4));
-               set_base(get_cpu_gdt_table(i)[APM_CS_16 >> 3],
-                        __va((unsigned long)apm_info.bios.cseg_16 << 4));
-               set_base(get_cpu_gdt_table(i)[APM_DS >> 3],
-                        __va((unsigned long)apm_info.bios.dseg << 4));
-#ifndef APM_RELAX_SEGMENTS
-               if (apm_info.bios.version == 0x100) {
-#endif
-                       /* For ASUS motherboard, Award BIOS rev 110 (and others?) */
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3], 64 * 1024 - 1);
-                       /* For some unknown machine. */
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS_16 >> 3], 64 * 1024 - 1);
-                       /* For the DEC Hinote Ultra CT475 (and others?) */
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_DS >> 3], 64 * 1024 - 1);
-#ifndef APM_RELAX_SEGMENTS
-               } else {
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3],
-                               (apm_info.bios.cseg_len - 1) & 0xffff);
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS_16 >> 3],
-                               (apm_info.bios.cseg_16_len - 1) & 0xffff);
-                       _set_limit((char *)&get_cpu_gdt_table(i)[APM_DS >> 3],
-                               (apm_info.bios.dseg_len - 1) & 0xffff);
-                     /* workaround for broken BIOSes */
-                       if (apm_info.bios.cseg_len <= apm_info.bios.offset)
-                               _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3], 64 * 1024 -1);
-                       if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */
-                               /* for the BIOS that assumes granularity = 1 */
-                               get_cpu_gdt_table(i)[APM_DS >> 3].b |= 0x800000;
-                               printk(KERN_NOTICE "apm: we set the granularity of dseg.\n");
-                       }
-               }
-#endif
-       }
-
-       apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
-       if (apm_proc)
-               apm_proc->owner = THIS_MODULE;
-
-       ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
-       if (ret < 0) {
-               printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
-               return -ENOMEM;
-       }
-
-       if (num_online_cpus() > 1 && !smp ) {
-               printk(KERN_NOTICE
-                  "apm: disabled - APM is not SMP safe (power off active).\n");
-               return 0;
-       }
-
-       misc_register(&apm_device);
-
-       if (HZ != 100)
-               idle_period = (idle_period * HZ) / 100;
-       if (idle_threshold < 100) {
-               original_pm_idle = pm_idle;
-               pm_idle  = apm_cpu_idle;
-               set_pm_idle = 1;
-       }
-
-       return 0;
-}
-
-static void __exit apm_exit(void)
-{
-       int     error;
-
-       if (set_pm_idle) {
-               pm_idle = original_pm_idle;
-               /*
-                * We are about to unload the current idle thread pm callback
-                * (pm_idle), Wait for all processors to update cached/local
-                * copies of pm_idle before proceeding.
-                */
-               cpu_idle_wait();
-       }
-       if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
-           && (apm_info.connection_version > 0x0100)) {
-               error = apm_engage_power_management(APM_DEVICE_ALL, 0);
-               if (error)
-                       apm_error("disengage power management", error);
-       }
-       misc_deregister(&apm_device);
-       remove_proc_entry("apm", NULL);
-       if (power_off)
-               pm_power_off = NULL;
-       exit_kapmd = 1;
-       while (kapmd_running)
-               schedule();
-       pm_active = 0;
-}
-
-module_init(apm_init);
-module_exit(apm_exit);
-
-MODULE_AUTHOR("Stephen Rothwell");
-MODULE_DESCRIPTION("Advanced Power Management");
-MODULE_LICENSE("GPL");
-module_param(debug, bool, 0644);
-MODULE_PARM_DESC(debug, "Enable debug mode");
-module_param(power_off, bool, 0444);
-MODULE_PARM_DESC(power_off, "Enable power off");
-module_param(bounce_interval, int, 0444);
-MODULE_PARM_DESC(bounce_interval,
-               "Set the number of ticks to ignore suspend bounces");
-module_param(allow_ints, bool, 0444);
-MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls");
-module_param(broken_psr, bool, 0444);
-MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call");
-module_param(realmode_power_off, bool, 0444);
-MODULE_PARM_DESC(realmode_power_off,
-               "Switch to real mode before powering off");
-module_param(idle_threshold, int, 0444);
-MODULE_PARM_DESC(idle_threshold,
-       "System idle percentage above which to make APM BIOS idle calls");
-module_param(idle_period, int, 0444);
-MODULE_PARM_DESC(idle_period,
-       "Period (in sec/100) over which to caculate the idle percentage");
-module_param(smp, bool, 0444);
-MODULE_PARM_DESC(smp,
-       "Set this to enable APM use on an SMP platform. Use with caution on older systems");
-MODULE_ALIAS_MISCDEV(APM_MINOR_DEV);
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c

index c501f4dd71d2f1e13d4822b2f8e9df187a6817d1..047e7953d7d093861707db7cd603bf391e8a111f 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
@@ -33,8 +33,6 @@ static int disable_x86_serial_nr __devinitdata = 1;
  
  struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
  
-extern void mcheck_init(struct cpuinfo_x86 *c);
-
  extern void machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c);
  
  extern int disable_pse;
@@ -238,10 +236,10 @@ static void __init early_cpu_detect(void)
                 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                 c->x86 = (tfms >> 8) & 15;
                 c->x86_model = (tfms >> 4) & 15;
-               if (c->x86 == 0xf) {
+               if (c->x86 == 0xf)
                         c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
                         c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               }
                 c->x86_mask = tfms & 15;
                 if (cap0 & (1<<19))
                         c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
@@ -340,7 +338,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
         c->x86_model = c->x86_mask = 0; /* So far unknown... */
         c->x86_vendor_id[0] = '\0'; /* Unset */
         c->x86_model_id[0] = '\0';  /* Unset */
-       c->x86_num_cores = 1;
+       c->x86_max_cores = 1;
         memset(&c->x86_capability, 0, sizeof c->x86_capability);
  
         if (!have_cpuid_p()) {
@@ -436,9 +434,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
         }
  
         /* Init Machine Check Exception if available. */
-#ifdef CONFIG_X86_MCE
         mcheck_init(c);
-#endif
+
         if (c == &boot_cpu_data)
                 sysenter_setup();
         enable_sep_cpu();
@@ -453,52 +450,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
  void __devinit detect_ht(struct cpuinfo_x86 *c)
  {
         u32     eax, ebx, ecx, edx;
-       int     index_msb, tmp;
+       int     index_msb, core_bits;
         int     cpu = smp_processor_id();
  
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+
+       c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+
         if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                 return;
  
-       cpuid(1, &eax, &ebx, &ecx, &edx);
         smp_num_siblings = (ebx & 0xff0000) >> 16;
  
         if (smp_num_siblings == 1) {
                 printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
         } else if (smp_num_siblings > 1 ) {
-               index_msb = 31;
  
                 if (smp_num_siblings > NR_CPUS) {
                         printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
                         smp_num_siblings = 1;
                         return;
                 }
-               tmp = smp_num_siblings;
-               while ((tmp & 0x80000000 ) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+
+               index_msb = get_count_order(smp_num_siblings);
                 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
  
                 printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                        phys_proc_id[cpu]);
  
-               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  
-               tmp = smp_num_siblings;
-               index_msb = 31;
-               while ((tmp & 0x80000000) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
+               index_msb = get_count_order(smp_num_siblings) ;
  
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+               core_bits = get_count_order(c->x86_max_cores);
  
-               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+                                              ((1 << core_bits) - 1);
  
-               if (c->x86_num_cores > 1)
+               if (c->x86_max_cores > 1)
                         printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
                                cpu_core_id[cpu]);
         }
@@ -615,12 +604,6 @@ void __cpuinit cpu_init(void)
                 set_in_cr4(X86_CR4_TSD);
         }
  
-       /*
-        * Set up the per-thread TLS descriptor cache:
-        */
-       memcpy(thread->tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
-              GDT_ENTRY_TLS_ENTRIES * 8);
-
         cpu_gdt_init(&cpu_gdt_descr[cpu]);
  
         /*
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S

index e0cdcd3fbeeee11c3950604e233254f82c447f1f..2d6807942971262aa2570c46679aa3e4e8a2182f 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
@@ -746,11 +746,10 @@ nmi_stack_fixup:
  nmi_debug_stack_check:
         cmpw $__KERNEL_CS,16(%esp)
         jne nmi_stack_correct
-       cmpl $debug - 1,(%esp)
-       jle nmi_stack_correct
+       cmpl $debug,(%esp)
+       jb nmi_stack_correct
         cmpl $debug_esp_fix_insn,(%esp)
-       jle nmi_debug_stack_fixup
-nmi_debug_stack_fixup:
+       ja nmi_stack_correct
         FIX_STACK(24,nmi_stack_correct, 1)
         jmp nmi_stack_correct
  
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c

index 63a35ef115b46be08988bba953f8b1a6e203361b..20c2ae1d2b8d6ccd28e48c849bcc64dddad3da39 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c
@@ -88,6 +88,9 @@ static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsign
  int (*ioapic_renumber_irq)(int ioapic, int irq);
  atomic_t irq_mis_count;
  
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
  static DEFINE_SPINLOCK(ioapic_lock);
  
  /*
@@ -784,10 +787,11 @@ static int find_irq_entry(int apic, int pin, int type)
  }
  
  #ifndef CONFIG_XEN
+#endif
  /*
   * Find the pin to which IRQ[irq] (ISA) is connected
   */
-static int find_isa_irq_pin(int irq, int type)
+static int __init find_isa_irq_pin(int irq, int type)
  {
         int i;
  
@@ -806,7 +810,33 @@ static int find_isa_irq_pin(int irq, int type)
         }
         return -1;
  }
-#endif
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
  
  /*
   * Find a specific PCI IRQ entry.
@@ -1306,7 +1336,7 @@ static void __init setup_IO_APIC_irqs(void)
   * Set up the 8259A-master output pin:
   */
  #ifndef CONFIG_XEN
-static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
  {
         struct IO_APIC_route_entry entry;
         unsigned long flags;
@@ -1340,8 +1370,8 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
          * Add it to the IO-APIC irq-routing table:
          */
         spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-       io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+       io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
         spin_unlock_irqrestore(&ioapic_lock, flags);
  
         enable_8259A_irq(0);
@@ -1647,7 +1677,8 @@ void __init print_IO_APIC(void) { }
  static void __init enable_IO_APIC(void)
  {
         union IO_APIC_reg_01 reg_01;
-       int i;
+       int i8259_apic, i8259_pin;
+       int i, apic;
         unsigned long flags;
  
         for (i = 0; i < PIN_MAP_SIZE; i++) {
@@ -1661,11 +1692,52 @@ static void __init enable_IO_APIC(void)
         /*
          * The number of IO-APIC IRQ registers (== #pins):
          */
-       for (i = 0; i < nr_ioapics; i++) {
+       for (apic = 0; apic < nr_ioapics; apic++) {
                 spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(i, 1);
+               reg_01.raw = io_apic_read(apic, 1);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[i] = reg_01.bits.entries+1;
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for(pin = 0; pin < nr_ioapic_registers[i]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       spin_lock_irqsave(&ioapic_lock, flags);
+                       *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+                       *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
         }
  
         /*
@@ -1679,9 +1751,6 @@ static void __init enable_IO_APIC(void)
   */
  void disable_IO_APIC(void)
  {
-#ifndef CONFIG_XEN
-       int pin;
-#endif
         /*
          * Clear the IO-APIC before rebooting:
          */
@@ -1693,8 +1762,7 @@ void disable_IO_APIC(void)
          * Put that IOAPIC in virtual wire mode
          * so legacy interrupts can be delivered.
          */
-       pin = find_isa_irq_pin(0, mp_ExtINT);
-       if (pin != -1) {
+       if (ioapic_i8259.pin != -1) {
                 struct IO_APIC_route_entry entry;
                 unsigned long flags;
  
@@ -1705,7 +1773,7 @@ void disable_IO_APIC(void)
                 entry.polarity        = 0; /* High */
                 entry.delivery_status = 0;
                 entry.dest_mode       = 0; /* Physical */
-               entry.delivery_mode   = 7; /* ExtInt */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                 entry.vector          = 0;
                 entry.dest.physical.physical_dest = 0;
  
@@ -1714,11 +1782,13 @@ void disable_IO_APIC(void)
                  * Add it to the IO-APIC irq-routing table:
                  */
                 spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-               io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+               io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+1));
+               io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+0));
                 spin_unlock_irqrestore(&ioapic_lock, flags);
         }
-       disconnect_bsp_APIC(pin != -1);
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
  #endif
  }
  
@@ -1994,7 +2064,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
  {
         int irq = vector_to_irq(vector);
  
-       move_irq(vector);
+       move_native_irq(vector);
         ack_edge_ioapic_irq(irq);
  }
  
@@ -2009,7 +2079,7 @@ static void end_level_ioapic_vector (unsigned int vector)
  {
         int irq = vector_to_irq(vector);
  
-       move_irq(vector);
+       move_native_irq(vector);
         end_level_ioapic_irq(irq);
  }
  
@@ -2174,20 +2244,21 @@ static void setup_nmi (void)
   */
  static inline void unlock_ExtINT_logic(void)
  {
-       int pin, i;
+       int apic, pin, i;
         struct IO_APIC_route_entry entry0, entry1;
         unsigned char save_control, save_freq_select;
         unsigned long flags;
  
-       pin = find_isa_irq_pin(8, mp_INT);
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
         if (pin == -1)
                 return;
  
         spin_lock_irqsave(&ioapic_lock, flags);
-       *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
-       *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+       *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
         spin_unlock_irqrestore(&ioapic_lock, flags);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
  
         memset(&entry1, 0, sizeof(entry1));
  
@@ -2200,8 +2271,8 @@ static inline void unlock_ExtINT_logic(void)
         entry1.vector = 0;
  
         spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
         spin_unlock_irqrestore(&ioapic_lock, flags);
  
         save_control = CMOS_READ(RTC_CONTROL);
@@ -2219,11 +2290,11 @@ static inline void unlock_ExtINT_logic(void)
  
         CMOS_WRITE(save_control, RTC_CONTROL);
         CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
  
         spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
         spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
@@ -2235,7 +2306,7 @@ static inline void unlock_ExtINT_logic(void)
   */
  static inline void check_timer(void)
  {
-       int pin1, pin2;
+       int apic1, pin1, apic2, pin2;
         int vector;
  
         /*
@@ -2257,10 +2328,13 @@ static inline void check_timer(void)
         timer_ack = 1;
         enable_8259A_irq(0);
  
-       pin1 = find_isa_irq_pin(0, mp_INT);
-       pin2 = find_isa_irq_pin(0, mp_ExtINT);
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
  
-       printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+       printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
  
         if (pin1 != -1) {
                 /*
@@ -2277,8 +2351,9 @@ static inline void check_timer(void)
                                 clear_IO_APIC_pin(0, pin1);
                         return;
                 }
-               clear_IO_APIC_pin(0, pin1);
-               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+               clear_IO_APIC_pin(apic1, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+                               "IO-APIC\n");
         }
  
         printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
@@ -2287,13 +2362,13 @@ static inline void check_timer(void)
                 /*
                  * legacy devices should be connected to IO APIC #0
                  */
-               setup_ExtINT_IRQ0_pin(pin2, vector);
+               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
                 if (timer_irq_works()) {
                         printk("works.\n");
                         if (pin1 != -1)
-                               replace_pin_at_irq(0, 0, pin1, 0, pin2);
+                               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
                         else
-                               add_pin_to_irq(0, 0, pin2);
+                               add_pin_to_irq(0, apic2, pin2);
                         if (nmi_watchdog == NMI_IO_APIC) {
                                 setup_nmi();
                         }
@@ -2302,7 +2377,7 @@ static inline void check_timer(void)
                 /*
                  * Cleanup, just in case ...
                  */
-               clear_IO_APIC_pin(0, pin2);
+               clear_IO_APIC_pin(apic2, pin2);
         }
         printk(" failed.\n");
  
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c

index c7ecad9078bdaa664782d823f9d14a96318c73f6..d202ff4988ff032a0e1ecee81b59c4e0c8c78d06 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c
@@ -218,7 +218,7 @@ int show_interrupts(struct seq_file *p, void *v)
  
         if (i == 0) {
                 seq_printf(p, "           ");
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                         seq_printf(p, "CPU%d       ",j);
                 seq_putc(p, '\n');
         }
@@ -232,7 +232,7 @@ int show_interrupts(struct seq_file *p, void *v)
  #ifndef CONFIG_SMP
                 seq_printf(p, "%10u ", kstat_irqs(i));
  #else
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                         seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
  #endif
                 seq_printf(p, " %14s", irq_desc[i].handler->typename);
@@ -246,12 +246,12 @@ skip:
                 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
         } else if (i == NR_IRQS) {
                 seq_printf(p, "NMI: ");
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                         seq_printf(p, "%10u ", nmi_count(j));
                 seq_putc(p, '\n');
  #ifdef CONFIG_X86_LOCAL_APIC
                 seq_printf(p, "LOC: ");
-               for_each_cpu(j)
+               for_each_online_cpu(j)
                         seq_printf(p, "%10u ",
                                 per_cpu(irq_stat,j).apic_timer_irqs);
                 seq_putc(p, '\n');
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c

index be63e1282642cef6ee7857b33c62ebf72155b469..06970d951759692de2cd7ea7c3a1f99f532aea0b 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c
@@ -18,6 +18,7 @@
  #include <asm/system.h>
  #include <asm/ldt.h>
  #include <asm/desc.h>
+#include <asm/mmu_context.h>
  
  #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
  static void flush_ldt(void *null)
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c

index 99d85315425c8ec241cb9992de067936a5274e73..737d646a590557259855c1554be3572a8f3c182e 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c
@@ -69,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
  /* Processor that is doing the boot up */
  unsigned int boot_cpu_physical_apicid = -1U;
  /* Internal processor count */
-static unsigned int __initdata num_processors;
+static unsigned int __devinitdata num_processors;
  
  /* Bitmask of physically existing CPUs */
  physid_mask_t phys_cpu_present_map;
@@ -120,7 +120,7 @@ static int MP_valid_apicid(int apicid, int version)
  #endif
  
  #ifndef CONFIG_XEN
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __devinit MP_processor_info (struct mpc_config_processor *m)
  {
         int ver, apicid;
         physid_mask_t phys_cpu;
@@ -183,17 +183,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
                 boot_cpu_physical_apicid = m->mpc_apicid;
         }
  
-       if (num_processors >= NR_CPUS) {
-               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS); 
-               return;
-       }
-
-       if (num_processors >= maxcpus) {
-               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
-                       " Processor ignored.\n", maxcpus); 
-               return;
-       }
         ver = m->mpc_apicver;
  
         if (!MP_valid_apicid(apicid, ver)) {
@@ -202,11 +191,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
                 return;
         }
  
-       cpu_set(num_processors, cpu_possible_map);
-       num_processors++;
-       phys_cpu = apicid_to_cpu_present(apicid);
-       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
-
         /*
          * Validate version
          */
@@ -217,9 +201,29 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
                 ver = 0x10;
         }
         apic_version[m->mpc_apicid] = ver;
+
+       phys_cpu = apicid_to_cpu_present(apicid);
+       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       "  Processor ignored.\n", NR_CPUS);
+               return;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                       " Processor ignored.\n", maxcpus);
+               return;
+       }
+
+       cpu_set(num_processors, cpu_possible_map);
+       num_processors++;
+
         if ((num_processors > 8) &&
-           APIC_XAPIC(ver) &&
-           (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
+           ((APIC_XAPIC(ver) &&
+            (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) ||
+            (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)))
                 def_to_bigsmp = 1;
         else
                 def_to_bigsmp = 0;
@@ -850,7 +854,7 @@ void __init mp_register_lapic_address (
  }
  
  
-void __init mp_register_lapic (
+void __devinit mp_register_lapic (
         u8                      id, 
         u8                      enabled)
  {
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c

index ad2d21710e8b3d074741ea16f85cdb3878a13a0a..0653e95fa052125c86bec9a60215a67335f39194 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
@@ -108,16 +108,32 @@ void xen_idle(void)
         if (need_resched()) {
                 local_irq_enable();
         } else {
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
                 stop_hz_timer();
                 /* Blocking includes an implicit local_irq_enable(). */
                 HYPERVISOR_sched_op(SCHEDOP_block, 0);
                 start_hz_timer();
+               set_thread_flag(TIF_POLLING_NRFLAG);
         }
  }
  #ifdef CONFIG_APM_MODULE
  EXPORT_SYMBOL(default_idle);
  #endif
  
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void play_dead(void)
+{
+       HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+       local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
  /*
   * The idle thread. There's no useful work to be
   * done, so just try to conserve power and have a
@@ -126,9 +142,9 @@ EXPORT_SYMBOL(default_idle);
   */
  void cpu_idle(void)
  {
-#if defined(CONFIG_HOTPLUG_CPU)
-       int cpu = raw_smp_processor_id();
-#endif
+       int cpu = smp_processor_id();
+
+       set_thread_flag(TIF_POLLING_NRFLAG);
  
         /* endless idle loop with no priority at all */
         while (1) {
@@ -139,17 +155,15 @@ void cpu_idle(void)
  
                         rmb();
  
-#if defined(CONFIG_HOTPLUG_CPU)
-                       if (cpu_is_offline(cpu)) {
-                               HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
-                               local_irq_enable();
-                       }
-#endif
+                       if (cpu_is_offline(cpu))
+                               play_dead();
  
                         __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                         xen_idle();
                 }
+               preempt_enable_no_resched();
                 schedule();
+               preempt_disable();
         }
  }
  
@@ -187,6 +201,8 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c) {}
  
  void show_regs(struct pt_regs * regs)
  {
+       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
         printk("\n");
         printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
         printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
@@ -203,6 +219,13 @@ void show_regs(struct pt_regs * regs)
         printk(" DS: %04x ES: %04x\n",
                 0xffff & regs->xds,0xffff & regs->xes);
  
+       cr0 = read_cr0();
+       cr2 = read_cr2();
+       cr3 = read_cr3();
+       if (current_cpu_data.x86 > 4) {
+               cr4 = read_cr4();
+       }
+       printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
         show_trace(NULL, &regs->esp);
  }
  
@@ -275,13 +298,6 @@ void flush_thread(void)
  {
         struct task_struct *tsk = current;
  
-       /*
-        * Remove function-return probe instances associated with this task
-        * and put them back on the free list. Do not insert an exit probe for
-        * this function, it will be disabled by kprobe_flush_task if you do.
-        */
-       kprobe_flush_task(tsk);
-
         memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
         /*
@@ -445,7 +461,9 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
         struct pt_regs ptregs;
         
         ptregs = *(struct pt_regs *)
-               ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
+               ((unsigned long)tsk->thread_info +
+               /* see comments in copy_thread() about -8 */
+               THREAD_SIZE - sizeof(ptregs) - 8);
         ptregs.xcs &= 0xffff;
         ptregs.xds &= 0xffff;
         ptregs.xes &= 0xffff;
@@ -453,7 +471,6 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
  
         elf_core_copy_regs(regs, &ptregs);
  
-       boot_option_idle_override = 1;
         return 1;
  }
  
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c

index 081eb3fdfe51892ac61df661f8592c34660eb143..c586e4bfec36d955be97e05759a2c39dc6aebc49 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
@@ -142,9 +142,7 @@ struct drive_info_struct { char dummy[32]; } drive_info;
  EXPORT_SYMBOL(drive_info);
  #endif
  struct screen_info screen_info;
-#ifdef CONFIG_VT
  EXPORT_SYMBOL(screen_info);
-#endif
  struct apm_info apm_info;
  EXPORT_SYMBOL(apm_info);
  struct sys_desc_table_struct {
@@ -425,14 +423,24 @@ static void __init limit_regions(unsigned long long size)
                 }
         }
         for (i = 0; i < e820.nr_map; i++) {
-               if (e820.map[i].type == E820_RAM) {
-                       current_addr = e820.map[i].addr + e820.map[i].size;
-                       if (current_addr >= size) {
-                               e820.map[i].size -= current_addr-size;
-                               e820.nr_map = i + 1;
-                               return;
-                       }
+               current_addr = e820.map[i].addr + e820.map[i].size;
+               if (current_addr < size)
+                       continue;
+
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+
+               if (e820.map[i].addr >= size) {
+                       /*
+                        * This region starts past the end of the
+                        * requested size, skip it completely.
+                        */
+                       e820.nr_map = i;
+               } else {
+                       e820.nr_map = i + 1;
+                       e820.map[i].size -= current_addr - size;
                 }
+               return;
         }
  }
  
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c b/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c

index 2c5d9f622b3d3d6be11e40767b21773b64c24789..4cd20871471f14dc54e3df821e9aaadc95d36d7b 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
@@ -68,15 +68,15 @@ EXPORT_SYMBOL(smp_num_siblings);
  
  /* Package ID of each logical CPU */
  int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(phys_proc_id);
  
  /* Core ID of each logical CPU */
  int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_core_id);
  
+/* representing HT siblings of each logical CPU */
  cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
  EXPORT_SYMBOL(cpu_sibling_map);
  
+/* representing HT and core siblings of each logical CPU */
  cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
  EXPORT_SYMBOL(cpu_core_map);
  
@@ -87,7 +87,11 @@ EXPORT_SYMBOL(cpu_online_map);
  cpumask_t cpu_callin_map;
  cpumask_t cpu_callout_map;
  EXPORT_SYMBOL(cpu_callout_map);
+#ifdef CONFIG_HOTPLUG_CPU
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+#else
  cpumask_t cpu_possible_map;
+#endif
  EXPORT_SYMBOL(cpu_possible_map);
  static cpumask_t smp_commenced_mask;
  
@@ -440,35 +444,60 @@ static void __devinit smp_callin(void)
  
  static int cpucount;
  
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
  static inline void
  set_cpu_sibling_map(int cpu)
  {
         int i;
+       struct cpuinfo_x86 *c = cpu_data;
+
+       cpu_set(cpu, cpu_sibling_setup_map);
  
         if (smp_num_siblings > 1) {
-               for (i = 0; i < NR_CPUS; i++) {
-                       if (!cpu_isset(i, cpu_callout_map))
-                               continue;
-                       if (cpu_core_id[cpu] == cpu_core_id[i]) {
+               for_each_cpu_mask(i, cpu_sibling_setup_map) {
+                       if (phys_proc_id[cpu] == phys_proc_id[i] &&
+                           cpu_core_id[cpu] == cpu_core_id[i]) {
                                 cpu_set(i, cpu_sibling_map[cpu]);
                                 cpu_set(cpu, cpu_sibling_map[i]);
+                               cpu_set(i, cpu_core_map[cpu]);
+                               cpu_set(cpu, cpu_core_map[i]);
                         }
                 }
         } else {
                 cpu_set(cpu, cpu_sibling_map[cpu]);
         }
  
-       if (current_cpu_data.x86_num_cores > 1) {
-               for (i = 0; i < NR_CPUS; i++) {
-                       if (!cpu_isset(i, cpu_callout_map))
-                               continue;
-                       if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                               cpu_set(i, cpu_core_map[cpu]);
-                               cpu_set(cpu, cpu_core_map[i]);
-                       }
-               }
-       } else {
+       if (current_cpu_data.x86_max_cores == 1) {
                 cpu_core_map[cpu] = cpu_sibling_map[cpu];
+               c[cpu].booted_cores = 1;
+               return;
+       }
+
+       for_each_cpu_mask(i, cpu_sibling_setup_map) {
+               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                       cpu_set(i, cpu_core_map[cpu]);
+                       cpu_set(cpu, cpu_core_map[i]);
+                       /*
+                        *  Does this new cpu bringup a new core?
+                        */
+                       if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+                               /*
+                                * for each core in package, increment
+                                * the booted_cores for this new cpu
+                                */
+                               if (first_cpu(cpu_sibling_map[i]) == i)
+                                       c[cpu].booted_cores++;
+                               /*
+                                * increment the core count for all
+                                * the other cpus in this package
+                                */
+                               if (i != cpu)
+                                       c[i].booted_cores++;
+                       } else if (i != cpu && !c[cpu].booted_cores)
+                               c[cpu].booted_cores = c[i].booted_cores;
+               }
         }
  }
  
@@ -483,6 +512,7 @@ static void __devinit start_secondary(void *unused)
          * things done here to the most necessary things.
          */
         cpu_init();
+       preempt_disable();
         smp_callin();
         while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
                 rep_nop();
@@ -608,7 +638,7 @@ static inline void __inquire_remote_apic(int apicid)
  
         printk("Inquiring remote APIC #%d...\n", apicid);
  
-       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+       for (i = 0; i < ARRAY_SIZE(regs); i++) {
                 printk("... APIC #%d %s: ", apicid, names[i]);
  
                 /*
@@ -1092,11 +1122,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
  
         current_thread_info()->cpu = 0;
         smp_tune_scheduling();
-       cpus_clear(cpu_sibling_map[0]);
-       cpu_set(0, cpu_sibling_map[0]);
  
-       cpus_clear(cpu_core_map[0]);
-       cpu_set(0, cpu_core_map[0]);
+       set_cpu_sibling_map(0);
  
         /*
          * If we couldn't find an SMP configuration at boot time,
@@ -1280,15 +1307,24 @@ static void
  remove_siblinginfo(int cpu)
  {
         int sibling;
+       struct cpuinfo_x86 *c = cpu_data;
  
+       for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+               cpu_clear(cpu, cpu_core_map[sibling]);
+               /*
+                * last thread sibling in this cpu core going down
+                */
+               if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+                       c[sibling].booted_cores--;
+       }
+                       
         for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
                 cpu_clear(cpu, cpu_sibling_map[sibling]);
-       for_each_cpu_mask(sibling, cpu_core_map[cpu])
-               cpu_clear(cpu, cpu_core_map[sibling]);
         cpus_clear(cpu_sibling_map[cpu]);
         cpus_clear(cpu_core_map[cpu]);
         phys_proc_id[cpu] = BAD_APICID;
         cpu_core_id[cpu] = BAD_APICID;
+       cpu_clear(cpu, cpu_sibling_setup_map);
  }
  
  int __cpu_disable(void)
@@ -1307,8 +1343,7 @@ int __cpu_disable(void)
         if (cpu == 0)
                 return -EBUSY;
  
-       /* We enable the timer again on the exit path of the death loop */
-       disable_APIC_timer();
+       clear_local_APIC();
         /* Allow any queued timer interrupts to get serviced */
         local_irq_enable();
         mdelay(1);
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c

index 5639177ffe533cf41408a9907d8ee95c7f591b1e..a93026083d5220fe8a074fd3ca7b7a167109d992 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
@@ -69,8 +69,6 @@
  
  #include <asm/arch_hooks.h>
  
-#include "io_ports.h"
-
  #include <xen/evtchn.h>
  
  #if defined (__i386__)
@@ -79,10 +77,6 @@
  
  int pit_latch_buggy;              /* extern */
  
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
  #if defined(__x86_64__)
  unsigned long vxtime_hz = PIT_TICK_RATE;
  struct vxtime_data __vxtime __section_vxtime;   /* for vsyscalls */
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c

index 9d5406d0f4990dfe6a9be7ea40a1695ba278c674..dd0ead71f30785b84cb8e3da3d27a134c58de9b3 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
@@ -627,13 +627,6 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
  
         cpu = smp_processor_id();
  
-#ifdef CONFIG_HOTPLUG_CPU
-       if (!cpu_online(cpu)) {
-               nmi_exit();
-               return;
-       }
-#endif
-
         ++nmi_count(cpu);
  
         if (!rcu_dereference(nmi_callback)(regs, cpu))
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/traps.c b/linux-2.6-xen-sparse/arch/i386/kernel/traps.c

index bd375b69f231eab965b4c686f176e44c85dde7ca..4aa7f5e7092b59f12159249c281eb0bdca4d33be 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps.c
@@ -488,6 +488,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
                                 tss->io_bitmap_max - thread->io_bitmap_max);
                 tss->io_bitmap_max = thread->io_bitmap_max;
                 tss->io_bitmap_base = IO_BITMAP_OFFSET;
+               tss->io_bitmap_owner = thread;
                 put_cpu();
                 return;
         }
@@ -642,13 +643,6 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
  
         cpu = smp_processor_id();
  
-#ifdef CONFIG_HOTPLUG_CPU
-       if (!cpu_online(cpu)) {
-               nmi_exit();
-               return;
-       }
-#endif
-
         ++nmi_count(cpu);
  
         if (!rcu_dereference(nmi_callback)(regs, cpu))
diff --git a/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile b/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile

index 08a5c77f88268e11131a85d5936af0b12727365b..eea08a7ec240dfa8909f594ef534971e1e4f2333 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile
@@ -3,5 +3,5 @@
  #
  
  obj-y                          := setup.o topology.o
-
+  
  topology-y                     := ../mach-default/topology.o
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c

index 001185b2e76e80108dd05abd50583309c55f335f..08887a10e774aa94c0369e5a573c9dea4e0ea148 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
@@ -22,7 +22,6 @@
  #include <linux/highmem.h>
  #include <linux/module.h>
  #include <linux/kprobes.h>
-#include <linux/percpu.h>
  
  #include <asm/system.h>
  #include <asm/uaccess.h>
@@ -31,8 +30,6 @@
  
  extern void die(const char *,struct pt_regs *,long);
  
-DEFINE_PER_CPU(pgd_t *, cur_pgd);
-
  /*
   * Unlock any spinlocks which will prevent us from getting the
   * message out 
@@ -111,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
                 desc = (void *)desc + (seg & ~7);
         } else {
                 /* Must disable preemption while reading the GDT. */
-               desc = (u32 *)get_cpu_gdt_table(get_cpu());
+               desc = (u32 *)get_cpu_gdt_table(get_cpu());
                 desc = (void *)desc + (seg & ~7);
         }
  
@@ -223,10 +220,7 @@ static void dump_fault_path(unsigned long address)
         unsigned long *p, page;
         unsigned long mfn; 
  
-       preempt_disable();
-       page = __pa(per_cpu(cur_pgd, smp_processor_id()));
-       preempt_enable();
-
+       page = read_cr3();
         p  = (unsigned long *)__va(page);
         p += (address >> 30) * 2;
         printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
@@ -256,13 +250,8 @@ static void dump_fault_path(unsigned long address)
  {
         unsigned long page;
  
-       preempt_disable();
-       page = ((unsigned long *) per_cpu(cur_pgd, smp_processor_id()))
-           [address >> 22];
-       preempt_enable();
-
-       page = ((unsigned long *) per_cpu(cur_pgd, get_cpu()))
-           [address >> 22];
+       page = read_cr3();
+       page = ((unsigned long *) __va(page))[address >> 22];
         printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
                machine_to_phys(page));
         /*
@@ -304,8 +293,8 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
         unsigned long address;
         int write, si_code;
  
-       address = HYPERVISOR_shared_info->vcpu_info[
-               smp_processor_id()].arch.cr2;
+       /* get the address */
+        address = read_cr2();
  
         /* Set the "privileged fault" bit to something sane. */
         error_code &= ~4;
@@ -582,14 +571,14 @@ vmalloc_fault:
                  * an interrupt in the middle of a task switch..
                  */
                 int index = pgd_index(address);
+               unsigned long pgd_paddr;
                 pgd_t *pgd, *pgd_k;
                 pud_t *pud, *pud_k;
                 pmd_t *pmd, *pmd_k;
                 pte_t *pte_k;
  
-               preempt_disable();
-               pgd = index + per_cpu(cur_pgd, smp_processor_id());
-               preempt_enable();
+               pgd_paddr = read_cr3();
+               pgd = index + (pgd_t *)__va(pgd_paddr);
                 pgd_k = init_mm.pgd + index;
  
                 if (!pgd_present(*pgd_k))
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c

index b8f1846ae47ca49059c74d3f5b80c40b4787dd89..99f433cde30c00463eba53dbaa6ace8d1ce6afca 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
@@ -27,6 +27,8 @@
  #include <linux/slab.h>
  #include <linux/proc_fs.h>
  #include <linux/efi.h>
+#include <linux/memory_hotplug.h>
+#include <linux/initrd.h>
  
  #include <asm/processor.h>
  #include <asm/system.h>
@@ -313,18 +315,47 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
         pkmap_page_table = pte; 
  }
  
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+static void __devinit free_new_highpage(struct page *page, int pfn)
+{
+       set_page_count(page, 1);
+       if (pfn < xen_start_info->nr_pages)
+               __free_page(page);
+       totalhigh_pages++;
+}
+
+void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
  {
         if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
                 ClearPageReserved(page);
-               set_page_count(page, 1);
-               if (pfn < xen_start_info->nr_pages)
-                       __free_page(page);
-               totalhigh_pages++;
+               free_new_highpage(page, pfn);
         } else
                 SetPageReserved(page);
  }
  
+static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+{
+       free_new_highpage(page, pfn);
+       totalram_pages++;
+#ifdef CONFIG_FLATMEM
+       max_mapnr = max(pfn, max_mapnr);
+#endif
+       num_physpages++;
+       return 0;
+}
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+void online_page(struct page *page)
+{
+       ClearPageReserved(page);
+       add_one_highpage_hotplug(page, page_to_pfn(page));
+}
+
+
  #ifdef CONFIG_NUMA
  extern void set_highmem_pages_init(int);
  #else
@@ -332,7 +363,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
  {
         int pfn;
         for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
-               one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+               add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
         totalram_pages += totalhigh_pages;
  }
  #endif /* CONFIG_FLATMEM */
@@ -359,12 +390,9 @@ static void __init pagetable_init (void)
  {
         unsigned long vaddr;
         pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
-       int i;
  
         swapper_pg_dir = pgd_base;
         init_mm.pgd    = pgd_base;
-       for (i = 0; i < NR_CPUS; i++)
-               per_cpu(cur_pgd, i) = pgd_base;
  
         /* Enable PSE if available */
         if (cpu_has_pse) {
@@ -694,6 +722,28 @@ void __init mem_init(void)
         set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
  }
  
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int add_memory(u64 start, u64 size)
+{
+       struct pglist_data *pgdata = &contig_page_data;
+       struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+
+       return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+       return -EINVAL;
+}
+#endif
+
  kmem_cache_t *pgd_cache;
  kmem_cache_t *pmd_cache;
  
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c

index d1b1485246c99636308e097255389a5b5aaf9b4f..9966012aca57e54f49ec78ce3022de3d6a95403a 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
@@ -323,9 +323,15 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
  }
  EXPORT_SYMBOL(ioremap_nocache);
  
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
  void iounmap(volatile void __iomem *addr)
  {
-       struct vm_struct *p;
+       struct vm_struct *p, *o;
  
         if ((void __force *)addr <= high_memory)
                 return;
@@ -338,14 +344,27 @@ void iounmap(volatile void __iomem *addr)
         if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
                 return;
  
-       write_lock(&vmlist_lock);
-       p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
-       if (!p) { 
-               printk(KERN_WARNING "iounmap: bad address %p\n", addr);
+       addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
+
+       /* Use the vm area unlocked, assuming the caller
+          ensures there isn't another iounmap for the same address
+          in parallel. Reuse of the virtual address is prevented by
+          leaving it in the global lists until we're done with it.
+          cpa takes care of the direct mappings. */
+       read_lock(&vmlist_lock);
+       for (p = vmlist; p; p = p->next) {
+               if (p->addr == addr)
+                       break;
+       }
+       read_unlock(&vmlist_lock);
+
+       if (!p) {
+               printk("iounmap: bad address %p\n", addr);
                 dump_stack();
-               goto out_unlock;
+               return;
         }
  
+       /* Reset the direct mapping. Can block */
         if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
                 /* p->size includes the guard page, but cpa doesn't like that */
                 change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
@@ -353,8 +372,10 @@ void iounmap(volatile void __iomem *addr)
                                  PAGE_KERNEL);
                 global_flush_tlb();
         } 
-out_unlock:
-       write_unlock(&vmlist_lock);
+
+       /* Finally remove it */
+       o = remove_vm_area((void *)addr);
+       BUG_ON(p != o || o == NULL);
         kfree(p); 
  }
  EXPORT_SYMBOL(iounmap);
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c

index 78e944ce297aa18f6302259a7938aea3bc4224c1..41450dff13015a35ee89d137e2fd546dd42e54a2 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
@@ -39,11 +39,13 @@ void show_mem(void)
         pg_data_t *pgdat;
         unsigned long i;
         struct page_state ps;
+       unsigned long flags;
  
         printk(KERN_INFO "Mem-info:\n");
         show_free_areas();
         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
         for_each_pgdat(pgdat) {
+               pgdat_resize_lock(pgdat, &flags);
                 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
                         page = pgdat_page_nr(pgdat, i);
                         total++;
@@ -56,6 +58,7 @@ void show_mem(void)
                         else if (page_count(page))
                                 shared += page_count(page) - 1;
                 }
+               pgdat_resize_unlock(pgdat, &flags);
         }
         printk(KERN_INFO "%d pages of RAM\n", total);
         printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
@@ -267,19 +270,19 @@ static inline void pgd_list_add(pgd_t *pgd)
         struct page *page = virt_to_page(pgd);
         page->index = (unsigned long)pgd_list;
         if (pgd_list)
-               pgd_list->private = (unsigned long)&page->index;
+               set_page_private(pgd_list, (unsigned long)&page->index);
         pgd_list = page;
-       page->private = (unsigned long)&pgd_list;
+       set_page_private(page, (unsigned long)&pgd_list);
  }
  
  static inline void pgd_list_del(pgd_t *pgd)
  {
         struct page *next, **pprev, *page = virt_to_page(pgd);
         next = (struct page *)page->index;
-       pprev = (struct page **)page->private;
+       pprev = (struct page **)page_private(page);
         *pprev = next;
         if (next)
-               next->private = (unsigned long)pprev;
+               set_page_private(next, (unsigned long)pprev);
  }
  
  void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
diff --git a/linux-2.6-xen-sparse/arch/i386/pci/Makefile b/linux-2.6-xen-sparse/arch/i386/pci/Makefile

index 88343ce38384b1bb8f49914a5dcd4d14b2213933..d9bddd419aa498708f0651effcf8577b635e5fca 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/pci/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/pci/Makefile
@@ -1,7 +1,7 @@
  obj-y                          := i386.o
  
  obj-$(CONFIG_PCI_BIOS)         += pcbios.o
-obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig.o direct.o
  obj-$(CONFIG_PCI_DIRECT)       += direct.o
  
  pci-y                          := fixup.o
diff --git a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c

index 35b4ea48716ea474d65ead0f0707ffd4c9ff4a86..53d08aaca6c5fa41e83e89e62b2d4938cdbdffb1 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
@@ -550,31 +550,48 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
         return 0;
  }
  
-static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+static __init int via_router_probe(struct irq_router *r,
+                               struct pci_dev *router, u16 device)
  {
         /* FIXME: We should move some of the quirk fixup stuff here */
  
-       if (router->device == PCI_DEVICE_ID_VIA_82C686 &&
-                       device == PCI_DEVICE_ID_VIA_82C586_0) {
-               /* Asus k7m bios wrongly reports 82C686A as 586-compatible */
-               device = PCI_DEVICE_ID_VIA_82C686;
+       /*
+        * work arounds for some buggy BIOSes
+        */
+       if (device == PCI_DEVICE_ID_VIA_82C586_0) {
+               switch(router->device) {
+               case PCI_DEVICE_ID_VIA_82C686:
+                       /*
+                        * Asus k7m bios wrongly reports 82C686A
+                        * as 586-compatible
+                        */
+                       device = PCI_DEVICE_ID_VIA_82C686;
+                       break;
+               case PCI_DEVICE_ID_VIA_8235:
+                       /**
+                        * Asus a7v-x bios wrongly reports 8235
+                        * as 586-compatible
+                        */
+                       device = PCI_DEVICE_ID_VIA_8235;
+                       break;
+               }
         }
  
-       switch(device)
-       {
-               case PCI_DEVICE_ID_VIA_82C586_0:
-                       r->name = "VIA";
-                       r->get = pirq_via586_get;
-                       r->set = pirq_via586_set;
-                       return 1;
-               case PCI_DEVICE_ID_VIA_82C596:
-               case PCI_DEVICE_ID_VIA_82C686:
-               case PCI_DEVICE_ID_VIA_8231:
+       switch(device) {
+       case PCI_DEVICE_ID_VIA_82C586_0:
+               r->name = "VIA";
+               r->get = pirq_via586_get;
+               r->set = pirq_via586_set;
+               return 1;
+       case PCI_DEVICE_ID_VIA_82C596:
+       case PCI_DEVICE_ID_VIA_82C686:
+       case PCI_DEVICE_ID_VIA_8231:
+       case PCI_DEVICE_ID_VIA_8235:
                 /* FIXME: add new ones for 8233/5 */
-                       r->name = "VIA";
-                       r->get = pirq_via_get;
-                       r->set = pirq_via_set;
-                       return 1;
+               r->name = "VIA";
+               r->get = pirq_via_get;
+               r->set = pirq_via_set;
+               return 1;
         }
         return 0;
  }
diff --git a/linux-2.6-xen-sparse/arch/i386/power/Makefile b/linux-2.6-xen-sparse/arch/i386/power/Makefile

new file mode 100644 (file)

index 0000000..e74fee6
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/i386/power/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_PM_LEGACY)                += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += cpu.o
+obj-$(CONFIG_ACPI_SLEEP)       += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
diff --git a/linux-2.6-xen-sparse/arch/um/kernel/physmem.c b/linux-2.6-xen-sparse/arch/um/kernel/physmem.c

index ef35c9310fc052d266d352f3d87bb05f42b36b67..7958a2791e0bda61a2f1c33a9662f480107a1743 100644 (file)
--- a/linux-2.6-xen-sparse/arch/um/kernel/physmem.c
+++ b/linux-2.6-xen-sparse/arch/um/kernel/physmem.c
@@ -248,7 +248,7 @@ int is_remapped(void *virt)
  /* Changed during early boot */
  unsigned long high_physmem;
  
-extern unsigned long physmem_size;
+extern unsigned long long physmem_size;
  
  int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
  {
@@ -323,7 +323,7 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
  extern int __syscall_stub_start, __binary_start;
  
  void setup_physmem(unsigned long start, unsigned long reserve_end,
-                  unsigned long len, unsigned long highmem)
+                  unsigned long len, unsigned long long highmem)
  {
         unsigned long reserve = reserve_end - start;
         int pfn = PFN_UP(__pa(reserve_end));
diff --git a/linux-2.6-xen-sparse/arch/x86_64/Kconfig b/linux-2.6-xen-sparse/arch/x86_64/Kconfig

index 5c157c9dabef06c4a119403f44492a74b6e87621..7d36bf4d4e190ca2c894e5769d39cd8fc5821549 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig
@@ -242,22 +242,42 @@ config SCHED_SMT
  
  source "kernel/Kconfig.preempt"
  
-config K8_NUMA
-       bool "K8 NUMA support"
-       select NUMA
+config NUMA
+       bool "Non Uniform Memory Access (NUMA) Support"
         depends on SMP && !X86_64_XEN
         help
-         Enable NUMA (Non Unified Memory Architecture) support for
-         AMD Opteron Multiprocessor systems. The kernel will try to allocate
-         memory used by a CPU on the local memory controller of the CPU
-         and add some more NUMA awareness to the kernel.
-         This code is recommended on all multiprocessor Opteron systems
-         and normally doesn't hurt on others.
+        Enable NUMA (Non Uniform Memory Access) support. The kernel 
+        will try to allocate memory used by a CPU on the local memory 
+        controller of the CPU and add some more NUMA awareness to the kernel.
+        This code is recommended on all multiprocessor Opteron systems.
+        If the system is EM64T, you should say N unless your system is EM64T 
+        NUMA. 
+
+config K8_NUMA
+       bool "Old style AMD Opteron NUMA detection"
+       depends on NUMA
+       default y
+       help
+        Enable K8 NUMA node topology detection.  You should say Y here if
+        you have a multi processor AMD K8 system. This uses an old
+        method to read the NUMA configurtion directly from the builtin
+        Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+        instead, which also takes priority if both are compiled in.   
+
+# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
+
+config X86_64_ACPI_NUMA
+       bool "ACPI NUMA detection"
+       depends on NUMA
+       select ACPI 
+       select ACPI_NUMA
+       default y
+       help
+        Enable ACPI SRAT based node topology detection.
  
  config NUMA_EMU
-       bool "NUMA emulation support"
-       select NUMA
-       depends on SMP && !X86_64_XEN
+       bool "NUMA emulation"
+       depends on NUMA
         help
           Enable NUMA emulation. A flat machine will be split
           into virtual nodes when booted with "numa=fake=N", where N is the
@@ -268,9 +288,6 @@ config ARCH_DISCONTIGMEM_ENABLE
         depends on NUMA
         default y
  
-config NUMA
-       bool
-       default n
  
  config ARCH_DISCONTIGMEM_ENABLE
         def_bool y
@@ -393,6 +410,14 @@ config X86_MCE_INTEL
            Additional support for intel specific MCE features such as
            the thermal monitor.
  
+config X86_MCE_AMD
+       bool "AMD MCE features"
+       depends on X86_MCE && X86_LOCAL_APIC
+       default y
+       help
+          Additional support for AMD specific MCE features such as
+          the DRAM Error Threshold.
+
  config PHYSICAL_START
         hex "Physical address where the kernel is loaded" if EMBEDDED
         default "0x100000"
@@ -528,7 +553,7 @@ config IA32_EMULATION
           left.
  
  config IA32_AOUT
-       bool "IA32 a.out support"
+       tristate "IA32 a.out support"
         depends on IA32_EMULATION
         help
           Support old a.out binaries in the 32bit emulation.
@@ -558,8 +583,21 @@ source "drivers/firmware/Kconfig"
  
  source fs/Kconfig
  
+menu "Instrumentation Support"
+        depends on EXPERIMENTAL
+
  source "arch/x86_64/oprofile/Kconfig"
  
+config KPROBES
+       bool "Kprobes (EXPERIMENTAL)"
+       help
+         Kprobes allows you to trap at almost any kernel address and
+         execute a callback function.  register_kprobe() establishes
+         a probepoint and specifies the callback.  Kprobes is useful
+         for kernel debugging, non-intrusive instrumentation and testing.
+         If in doubt, say "N".
+endmenu
+
  source "arch/x86_64/Kconfig.debug"
  
  source "security/Kconfig"
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile

index 4c7e5280839a0106f1a2e541540e09630fa74c5b..bc72d60ac79509e61121e6feddadc1909cfb46f1 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
  
  obj-$(CONFIG_X86_MCE)         += mce.o
  obj-$(CONFIG_X86_MCE_INTEL)    += mce_intel.o
+obj-$(CONFIG_X86_MCE_AMD)      += mce_amd.o
  obj-$(CONFIG_MTRR)             += ../../i386/kernel/cpu/mtrr/
  obj-$(CONFIG_ACPI)             += acpi/
  obj-$(CONFIG_X86_MSR)          += msr.o
@@ -22,13 +23,13 @@ obj-$(CONFIG_X86_XEN_GENAPIC)       += genapic.o genapic_xen.o
  obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o \
                 genapic.o genapic_cluster.o genapic_flat.o
  obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o crash.o
-obj-$(CONFIG_PM)               += suspend.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
+obj-$(CONFIG_ACPI_SLEEP)       += suspend.o
  obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
  obj-$(CONFIG_CPU_FREQ)         += cpufreq/
  obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
  obj-$(CONFIG_GART_IOMMU)       += pci-gart.o aperture.o
  obj-$(CONFIG_DUMMY_IOMMU)      += pci-nommu.o pci-dma.o
-obj-$(CONFIG_SWIOTLB)          += swiotlb.o
  obj-$(CONFIG_KPROBES)          += kprobes.o
  obj-$(CONFIG_X86_PM_TIMER)     += pmtimer.o
  
@@ -42,7 +43,6 @@ CFLAGS_vsyscall.o             := $(PROFILING) -g0
  bootflag-y                     += ../../i386/kernel/bootflag.o
  cpuid-$(subst m,y,$(CONFIG_X86_CPUID))  += ../../i386/kernel/cpuid.o
  topology-y                     += ../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB)      += ../../ia64/lib/swiotlb.o
  microcode-$(subst m,y,$(CONFIG_MICROCODE))  += ../../i386/kernel/microcode.o
  intel_cacheinfo-y              += ../../i386/kernel/cpu/intel_cacheinfo.o
  quirks-y                       += ../../i386/kernel/quirks.o
@@ -51,6 +51,7 @@ msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
  
  ifdef CONFIG_XEN
  time-y                         += ../../i386/kernel/time-xen.o
+obj-$(CONFIG_SWIOTLB)          += swiotlb.o
  swiotlb-$(CONFIG_SWIOTLB)      := ../../i386/kernel/swiotlb.o
  pci-dma-y                      += ../../i386/kernel/pci-dma-xen.o
  microcode-$(subst m,y,$(CONFIG_MICROCODE))  := ../../i386/kernel/microcode-xen.o
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c

index 8b5fb09dc1da59c90e3d533668e4a6e8e5469ef8..12cebc3020292c1c752c18ea1277157d0fc2874e 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
@@ -34,8 +34,6 @@
  #include <asm/arch_hooks.h>
  #include <asm/hpet.h>
  
-#include "io_ports.h"
-
  /*
   * Debug level
   */
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c

index f7b2263e1b9306723936b1e6ae91f9e049caa1ed..a66b628b68978a433c6ca0903eace86fb8a82294 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
@@ -23,6 +23,7 @@
  #include <asm/e820.h>
  #include <asm/proto.h>
  #include <asm/bootsetup.h>
+#include <asm/sections.h>
  #include <xen/interface/memory.h>
  
  unsigned long pci_mem_start = 0xaeedbabe;
@@ -54,7 +55,6 @@ void __init add_memory_region(unsigned long start, unsigned long size, int type)
  }
  
  #ifndef CONFIG_XEN
-extern char _end[];
  
  /* 
   * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S

index 1d6432a656e2f384d3fe549b70da2dfee06a7fd3..e1030282b5248d526107ccb36ba32a54beeaddaf 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
@@ -609,7 +609,14 @@ retint_kernel:
         CFI_ENDPROC
         .endm
  
-#if 0
+#ifndef CONFIG_XEN
+ENTRY(thermal_interrupt)
+       apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+
+ENTRY(threshold_interrupt)
+       apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+
+#ifdef CONFIG_SMP      
  ENTRY(reschedule_interrupt)
         apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  
@@ -630,6 +637,7 @@ ENTRY(invalidate_interrupt\num)
  ENTRY(call_function_interrupt)
         apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  #endif
+#endif /* !CONFIG_XEN */
  
  #ifdef CONFIG_X86_LOCAL_APIC   
  ENTRY(apic_timer_interrupt)
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S

index 70002df1804df95c04e46b5a2213c9b0553121e7..0055064587cf8f6881b3a9a2e178082e00cb4f2e 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
@@ -25,6 +25,7 @@
                  
        
  #include <linux/threads.h>
+#include <linux/init.h>
  #include <asm/desc.h>
  #include <asm/segment.h>
  #include <asm/page.h>
@@ -243,6 +244,26 @@ ENTRY(wakeup_level4_pgt)
         .quad   0x0000000000003007 + __PHYSICAL_START   /* -> level3_kernel_pgt */
  #endif
  
+#ifndef CONFIG_XEN
+#ifndef CONFIG_HOTPLUG_CPU
+       __INITDATA
+#endif
+       /*
+        * This default setting generates an ident mapping at address 0x100000
+        * and a mapping for the kernel that precisely maps virtual address
+        * 0xffffffff80000000 to physical address 0x000000. (always using
+        * 2Mbyte large pages provided by PAE mode)
+        */
+       .align PAGE_SIZE
+ENTRY(boot_level4_pgt)
+       .quad   0x0000000000002007 + __PHYSICAL_START   /* -> level3_ident_pgt */
+       .fill   255,8,0
+       .quad   0x000000000000a007 + __PHYSICAL_START
+       .fill   254,8,0
+       /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+       .quad   0x0000000000003007 + __PHYSICAL_START   /* -> level3_kernel_pgt */
+#endif
+
         .data
  
         .align 16
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c

index b6fd05ac824fdd3428e73ece997119e66eea889f..66324f2691f69628931361da5aef10ce410975d8 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
@@ -22,6 +22,8 @@
  #include <asm/bootsetup.h>
  #include <asm/setup.h>
  #include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
  
  unsigned long start_pfn;
  
@@ -30,9 +32,8 @@ unsigned long start_pfn;
  #if 0
  static void __init clear_bss(void)
  {
-       extern char __bss_start[], __bss_end[];
         memset(__bss_start, 0,
-              (unsigned long) __bss_end - (unsigned long) __bss_start);
+              (unsigned long) __bss_stop - (unsigned long) __bss_start);
  }
  #endif
  
@@ -84,8 +85,6 @@ static void __init setup_boot_cpu_data(void)
         boot_cpu_data.x86_mask = eax & 0xf;
  }
  
-extern char _end[];
-
  void __init x86_64_start_kernel(char * real_mode_data)
  {
         int i;
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c

index d9b7b082dd46a4c2fd20302c2fc5d12ced376fb8..b165c70346b95860804b89167942f76803e0ae69 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
   * Rough estimation of how many shared IRQs there are, can
   * be changed anytime.
   */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
  #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
  
  /*
@@ -162,6 +162,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
         static int first_free_entry = NR_IRQS;
         struct irq_pin_list *entry = irq_2_pin + irq;
  
+       BUG_ON(irq >= NR_IRQS);
         while (entry->next)
                 entry = irq_2_pin + entry->next;
  
@@ -169,7 +170,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
                 entry->next = first_free_entry;
                 entry = irq_2_pin + entry->next;
                 if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: whoops");
+                       panic("io_apic.c: ran out of irq_2_pin entries!");
         }
         entry->apic = apic;
         entry->pin = pin;
@@ -182,6 +183,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
         int pin;                                                        \
         struct irq_pin_list *entry = irq_2_pin + irq;                   \
                                                                         \
+       BUG_ON(irq >= NR_IRQS);                                         \
         for (;;) {                                                      \
                 unsigned int reg;                                       \
                 pin = entry->pin;                                       \
@@ -259,6 +261,8 @@ static void clear_IO_APIC (void)
  
  #endif /* !CONFIG_XEN */
  
+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+
  /*
   * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
   * specific CPU-side IRQs.
@@ -470,6 +474,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                 best_guess = irq;
                 }
         }
+       BUG_ON(best_guess >= NR_IRQS);
         return best_guess;
  }
  
@@ -660,6 +665,64 @@ static inline int irq_trigger(int idx)
         return MPBIOS_trigger(idx);
  }
  
+static int next_irq = 16;
+
+/*
+ * gsi_irq_sharing -- Name overload!  "irq" can be either a legacy IRQ
+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+ * from ACPI, which can reach 800 in large boxen.
+ *
+ * Compact the sparse GSI space into a sequential IRQ series and reuse
+ * vectors if possible.
+ */
+int gsi_irq_sharing(int gsi)
+{
+       int i, tries, vector;
+
+       BUG_ON(gsi >= NR_IRQ_VECTORS);
+
+       if (platform_legacy_irq(gsi))
+               return gsi;
+
+       if (gsi_2_irq[gsi] != 0xFF)
+               return (int)gsi_2_irq[gsi];
+
+       tries = NR_IRQS;
+  try_again:
+       vector = assign_irq_vector(gsi);
+
+       /*
+        * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+        * use of vector and if found, return that IRQ.  However, we never want
+        * to share legacy IRQs, which usually have a different trigger mode
+        * than PCI.
+        */
+       for (i = 0; i < NR_IRQS; i++)
+               if (IO_APIC_VECTOR(i) == vector)
+                       break;
+       if (platform_legacy_irq(i)) {
+               if (--tries >= 0) {
+                       IO_APIC_VECTOR(i) = 0;
+                       goto try_again;
+               }
+               panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+       }
+       if (i < NR_IRQS) {
+               gsi_2_irq[gsi] = i;
+               printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+                               gsi, vector, i);
+               return i;
+       }
+
+       i = next_irq++;
+       BUG_ON(i >= NR_IRQS);
+       gsi_2_irq[gsi] = i;
+       IO_APIC_VECTOR(i) = vector;
+       printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+                       gsi, vector, i);
+       return i;
+}
+
  static int pin_2_irq(int idx, int apic, int pin)
  {
         int irq, i;
@@ -689,6 +752,7 @@ static int pin_2_irq(int idx, int apic, int pin)
                         while (i < apic)
                                 irq += nr_ioapic_registers[i++];
                         irq += pin;
+                       irq = gsi_irq_sharing(irq);
                         break;
                 }
                 default:
@@ -698,6 +762,7 @@ static int pin_2_irq(int idx, int apic, int pin)
                         break;
                 }
         }
+       BUG_ON(irq >= NR_IRQS);
  
         /*
          * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -713,6 +778,7 @@ static int pin_2_irq(int idx, int apic, int pin)
                         }
                 }
         }
+       BUG_ON(irq >= NR_IRQS);
         return irq;
  }
  
@@ -741,7 +807,7 @@ int assign_irq_vector(int irq)
         static int current_vector = FIRST_DEVICE_VECTOR;
         physdev_op_t op;
    
-       BUG_ON(irq >= NR_IRQ_VECTORS);
+       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
         if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
                 return IO_APIC_VECTOR(irq);
  
@@ -1990,6 +2056,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
         entry.polarity = active_high_low;
         entry.mask = 1;                                  /* Disabled (masked) */
  
+       irq = gsi_irq_sharing(irq);
         /*
          * IRQs < 16 are already in the irq_2_pin[] map
          */
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c

index 75959569288b204192806d3a84306876dfdb6a81..c2338421014168594bed09efe130724f034a260c 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
@@ -42,7 +42,7 @@ int acpi_found_madt;
   * Various Linux-internal data structures created from the
   * MP-table.
   */
-int apic_version [MAX_APICS];
+unsigned char apic_version [MAX_APICS];
  unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
  int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
  
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0;
  /* Processor that is doing the boot up */
  unsigned int boot_cpu_id = -1U;
  /* Internal processor count */
-static unsigned int num_processors = 0;
+unsigned int num_processors __initdata = 0;
+
+unsigned disabled_cpus __initdata;
  
  /* Bitmask of physically existing CPUs */
  physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
@@ -107,11 +109,14 @@ static int __init mpf_checksum(unsigned char *mp, int len)
  #ifndef CONFIG_XEN
  static void __init MP_processor_info (struct mpc_config_processor *m)
  {
-       int ver, cpu;
+       int cpu;
+       unsigned char ver;
         static int found_bsp=0;
  
-       if (!(m->mpc_cpuflag & CPU_ENABLED))
+       if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+               disabled_cpus++;
                 return;
+       }
  
         printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
                 m->mpc_apicid,
@@ -130,12 +135,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
         }
  
         cpu = num_processors++;
-
-       if (m->mpc_apicid > MAX_APICS) {
+       
+#if MAX_APICS < 255    
+       if ((int)m->mpc_apicid > MAX_APICS) {
                 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
                         m->mpc_apicid, MAX_APICS);
                 return;
         }
+#endif
         ver = m->mpc_apicver;
  
         physid_set(m->mpc_apicid, phys_cpu_present_map);
@@ -225,7 +232,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
                         m->mpc_irqtype, m->mpc_irqflag & 3,
                         (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
                         m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+       if (++mp_irq_entries >= MAX_IRQ_SOURCES)
                 panic("Max # of irq sources exceeded!!\n");
  }
  
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c

index 3636461a374cc818ecf6e4254bc191440c8dcb11..6b7972e3f1b0eaa7c6f6857f513fde76b5dff960 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c
@@ -25,7 +25,7 @@ EXPORT_SYMBOL(iommu_sac_force);
   */
  
  void *dma_alloc_coherent(struct device *hwdev, size_t size,
-                        dma_addr_t *dma_handle, unsigned gfp)
+                        dma_addr_t *dma_handle, gfp_t gfp)
  {
         void *ret;
         u64 mask;
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c

index ab3b84ca5df6648dd8b27073b5bc2d73bb83c1be..3c75dd9e75c5e80bfd0dc951677500f575409c96 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
@@ -96,10 +96,13 @@ void xen_idle(void)
         if (need_resched()) {
                 local_irq_enable();
         } else {
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
                 stop_hz_timer();
                 /* Blocking includes an implicit local_irq_enable(). */
                 HYPERVISOR_sched_op(SCHEDOP_block, 0);
                 start_hz_timer();
+               set_thread_flag(TIF_POLLING_NRFLAG);
         }
  }
  
@@ -143,6 +146,8 @@ static inline void play_dead(void)
   */
  void cpu_idle (void)
  {
+       set_thread_flag(TIF_POLLING_NRFLAG);
+
         /* endless idle loop with no priority at all */
         while (1) {
                 while (!need_resched()) {
@@ -156,7 +161,9 @@ void cpu_idle (void)
                         xen_idle();
                 }
  
+               preempt_enable_no_resched();
                 schedule();
+               preempt_disable();
         }
  }
  
@@ -180,7 +187,8 @@ void cpu_idle_wait(void)
         do {
                 ssleep(1);
                 for_each_online_cpu(cpu) {
-                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+                       if (cpu_isset(cpu, map) &&
+                                       !per_cpu(cpu_idle_state, cpu))
                                 cpu_clear(cpu, map);
                 }
                 cpus_and(map, map, cpu_online_map);
@@ -208,7 +216,8 @@ void __show_regs(struct pt_regs * regs)
                 system_utsname.version);
         printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
         printk_address(regs->rip); 
-       printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+       printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+               regs->eflags);
         printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
                regs->rax, regs->rbx, regs->rcx);
         printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -282,13 +291,6 @@ void flush_thread(void)
         struct task_struct *tsk = current;
         struct thread_info *t = current_thread_info();
  
-       /*
-        * Remove function-return probe instances associated with this task
-        * and put them back on the free list. Do not insert an exit probe for
-        * this function, it will be disabled by kprobe_flush_task if you do.
-        */
-       kprobe_flush_task(tsk);
-
         if (t->flags & _TIF_ABI_PENDING)
                 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
  
@@ -361,15 +363,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
         struct pt_regs * childregs;
         struct task_struct *me = current;
  
-       childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
-
+       childregs = ((struct pt_regs *)
+                       (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
         *childregs = *regs;
  
         childregs->rax = 0;
         childregs->rsp = rsp;
-       if (rsp == ~0UL) {
+       if (rsp == ~0UL)
                 childregs->rsp = (unsigned long)childregs;
-       }
  
         p->thread.rsp = (unsigned long) childregs;
         p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -391,7 +392,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
                         p->thread.io_bitmap_max = 0;
                         return -ENOMEM;
                 }
-               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+                               IO_BITMAP_BYTES);
         } 
  
         /*
@@ -432,7 +434,8 @@ static inline void __save_init_fpu( struct task_struct *tsk )
   * - fold all the options into a flag word and test it with a single test.
   * - could test fs/gs bitsliced
   */
-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  {
         struct thread_struct *prev = &prev_p->thread,
                                  *next = &next_p->thread;
@@ -530,7 +533,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
         prev->userrsp = read_pda(oldrsp); 
         write_pda(oldrsp, next->userrsp); 
         write_pda(pcurrent, next_p); 
-       write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+       write_pda(kernelstack,
+           (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
  
         /*
          * Now maybe reload the debug registers
@@ -591,7 +595,9 @@ asmlinkage long sys_fork(struct pt_regs *regs)
         return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
  }
  
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+asmlinkage long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+         void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
  {
         if (!newsp)
                 newsp = regs->rsp;
@@ -627,7 +633,8 @@ unsigned long get_wchan(struct task_struct *p)
                 return 0;
         fp = *(u64 *)(p->thread.rsp);
         do { 
-               if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+               if (fp < (unsigned long)stack ||
+                   fp > (unsigned long)stack+THREAD_SIZE)
                         return 0; 
                 rip = *(u64 *)(fp+8); 
                 if (!in_sched_functions(rip))
@@ -662,8 +669,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                         task->thread.gsindex = 0;
                         task->thread.gs = addr;
                         if (doit) {
-               load_gs_index(0);
-                ret = HYPERVISOR_set_segment_base(SEGBASE_GS_USER, addr);
+                               load_gs_index(0);
+                               ret = HYPERVISOR_set_segment_base(
+                                       SEGBASE_GS_USER, addr);
                         } 
                 }
                 put_cpu();
@@ -680,7 +688,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                         set_32bit_tls(task, FS_TLS, addr);
                         if (doit) { 
                                 load_TLS(&task->thread, cpu); 
-                               asm volatile("mov %0,%%fs" :: "r" (FS_TLS_SEL));
+                               asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
                         }
                         task->thread.fsindex = FS_TLS_SEL;
                         task->thread.fs = 0;
@@ -690,9 +698,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                         if (doit) {
                                 /* set the selector to 0 to not confuse
                                    __switch_to */
-               asm volatile("mov %0,%%fs" :: "r" (0));
-                                ret = HYPERVISOR_set_segment_base(SEGBASE_FS, addr);
-
+                               asm volatile("movl %0,%%fs" :: "r" (0));
+                                ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
+                                                                 addr);
                         }
                 }
                 put_cpu();
@@ -701,9 +709,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                 unsigned long base; 
                 if (task->thread.fsindex == FS_TLS_SEL)
                         base = read_32bit_tls(task, FS_TLS);
-               else if (doit) {
+               else if (doit)
                         rdmsrl(MSR_FS_BASE, base);
-               } else
+               else
                         base = task->thread.fs;
                 ret = put_user(base, (unsigned long __user *)addr); 
                 break; 
@@ -712,9 +720,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                 unsigned long base;
                 if (task->thread.gsindex == GS_TLS_SEL)
                         base = read_32bit_tls(task, GS_TLS);
-               else if (doit) {
+               else if (doit)
                         rdmsrl(MSR_KERNEL_GS_BASE, base);
-               } else
+               else
                         base = task->thread.gs;
                 ret = put_user(base, (unsigned long __user *)addr); 
                 break;
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c

index 12ac3576d8a3832b1ad71f53f9a86b5bc4fbbc51..d1b68d11e8aea584926d90db1c7d36299259443e 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
@@ -60,6 +60,7 @@
  #include <asm/setup.h>
  #include <asm/mach_apic.h>
  #include <asm/numa.h>
+#include <asm/sections.h>
  #ifdef CONFIG_XEN
  #include <linux/percpu.h>
  #include <xen/interface/physdev.h>
@@ -139,7 +140,6 @@ struct edid_info edid_info;
  struct e820map e820;
  
  extern int root_mountflags;
-extern char _text, _etext, _edata, _end;
  
  char command_line[COMMAND_LINE_SIZE];
  
@@ -473,7 +473,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
  {
         unsigned long bootmap_size, bootmap;
  
-       memory_present(0, start_pfn, end_pfn);
         bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
         bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
         if (bootmap == -1L)
@@ -773,8 +772,6 @@ void __init setup_arch(char **cmdline_p)
         }
  #endif
  
-       sparse_init();
-
         paging_init();
  #ifdef CONFIG_X86_LOCAL_APIC
         /*
@@ -855,6 +852,8 @@ void __init setup_arch(char **cmdline_p)
         check_ioapic();
  #endif
  
+       zap_low_mappings(0);
+
  #ifdef CONFIG_ACPI
         /*
          * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -1032,7 +1031,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  #endif
  
         bits = 0;
-       while ((1 << bits) < c->x86_num_cores)
+       while ((1 << bits) < c->x86_max_cores)
                 bits++;
  
         /* Low order bits define the core id (index of core in socket) */
@@ -1062,10 +1061,10 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
                 if (!node_online(node))
                         node = nearby_node(apicid);
         }
-       cpu_to_node[cpu] = node;
+       numa_set_node(cpu, node);
  
         printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
-                       cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
+                       cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
  #endif
  #endif
  }
@@ -1114,9 +1113,9 @@ static int __init init_amd(struct cpuinfo_x86 *c)
         display_cacheinfo(c);
  
         if (c->extended_cpuid_level >= 0x80000008) {
-               c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-               if (c->x86_num_cores & (c->x86_num_cores - 1))
-                       c->x86_num_cores = 1;
+               c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+               if (c->x86_max_cores & (c->x86_max_cores - 1))
+                       c->x86_max_cores = 1;
  
                 amd_detect_cmp(c);
         }
@@ -1128,54 +1127,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
  {
  #ifdef CONFIG_SMP
         u32     eax, ebx, ecx, edx;
-       int     index_msb, tmp;
+       int     index_msb, core_bits;
         int     cpu = smp_processor_id();
-       
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+
+       c->apicid = phys_pkg_id(0);
+
         if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                 return;
  
-       cpuid(1, &eax, &ebx, &ecx, &edx);
         smp_num_siblings = (ebx & 0xff0000) >> 16;
-       
+
         if (smp_num_siblings == 1) {
                 printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-       } else if (smp_num_siblings > 1) {
-               index_msb = 31;
-               /*
-                * At this point we only support two siblings per
-                * processor package.
-                */
+       } else if (smp_num_siblings > 1 ) {
+
                 if (smp_num_siblings > NR_CPUS) {
                         printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
                         smp_num_siblings = 1;
                         return;
                 }
-               tmp = smp_num_siblings;
-               while ((tmp & 0x80000000 ) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+
+               index_msb = get_count_order(smp_num_siblings);
                 phys_proc_id[cpu] = phys_pkg_id(index_msb);
-               
+
                 printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                        phys_proc_id[cpu]);
  
-               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  
-               tmp = smp_num_siblings;
-               index_msb = 31;
-               while ((tmp & 0x80000000) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+               index_msb = get_count_order(smp_num_siblings) ;
  
-               cpu_core_id[cpu] = phys_pkg_id(index_msb);
+               core_bits = get_count_order(c->x86_max_cores);
  
-               if (c->x86_num_cores > 1)
+               cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+                                              ((1 << core_bits) - 1);
+
+               if (c->x86_max_cores > 1)
                         printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
                                cpu_core_id[cpu]);
         }
@@ -1214,7 +1203,7 @@ static void srat_detect_node(void)
         node = apicid_to_node[hard_smp_processor_id()];
         if (node == NUMA_NO_NODE)
                 node = 0;
-       cpu_to_node[cpu] = node;
+       numa_set_node(cpu, node);
  
         if (acpi_numa > 0)
                 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
@@ -1232,13 +1221,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
                 unsigned eax = cpuid_eax(0x80000008);
                 c->x86_virt_bits = (eax >> 8) & 0xff;
                 c->x86_phys_bits = eax & 0xff;
+               /* CPUID workaround for Intel 0F34 CPU */
+               if (c->x86_vendor == X86_VENDOR_INTEL &&
+                   c->x86 == 0xF && c->x86_model == 0x3 &&
+                   c->x86_mask == 0x4)
+                       c->x86_phys_bits = 36;
         }
  
         if (c->x86 == 15)
                 c->x86_cache_alignment = c->x86_clflush_size * 2;
         if (c->x86 >= 15)
                 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
-       c->x86_num_cores = intel_num_cpu_cores(c);
+       c->x86_max_cores = intel_num_cpu_cores(c);
  
         srat_detect_node();
  }
@@ -1276,7 +1270,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
         c->x86_model_id[0] = '\0';  /* Unset */
         c->x86_clflush_size = 64;
         c->x86_cache_alignment = c->x86_clflush_size;
-       c->x86_num_cores = 1;
+       c->x86_max_cores = 1;
         c->extended_cpuid_level = 0;
         memset(&c->x86_capability, 0, sizeof c->x86_capability);
  
@@ -1299,10 +1293,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
                 c->x86 = (tfms >> 8) & 0xf;
                 c->x86_model = (tfms >> 4) & 0xf;
                 c->x86_mask = tfms & 0xf;
-               if (c->x86 == 0xf) {
+               if (c->x86 == 0xf)
                         c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
                         c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               } 
                 if (c->x86_capability[0] & (1<<19)) 
                         c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
         } else {
@@ -1452,7 +1446,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  
                 /* Intel-defined (#2) */
-               "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
+               "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
                 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1510,13 +1504,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
         
  #ifdef CONFIG_SMP
-       if (smp_num_siblings * c->x86_num_cores > 1) {
+       if (smp_num_siblings * c->x86_max_cores > 1) {
                 int cpu = c - cpu_data;
                 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
-               seq_printf(m, "siblings\t: %d\n",
-                               c->x86_num_cores * smp_num_siblings);
+               seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
                 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
-               seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+               seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
         }
  #endif 
  
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c

index 80442611bc53a1d6de5c306f350ccac03aac617f..0e202d2888fb84f3a8565e9f0c0c1d84f46c5b6f 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
@@ -255,6 +255,7 @@ void __cpuinit cpu_init (void)
         /* CPU 0 is initialised in head64.c */
         if (cpu != 0) {
                 pda_init(cpu);
+               zap_low_mappings(cpu);
         } else 
                 estacks = boot_exception_stacks; 
  
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c

index bea51ed8afb798e8136130d823819c2e9ea99a01..22db7bb8e8956cb6b42b9278ec791c9f8f543ac4 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
@@ -71,8 +71,6 @@ union smp_flush_state {
  static DEFINE_PER_CPU(union smp_flush_state, flush_state);
  #endif
  
-#define __cpuinit __init
-
  /*
   * We cannot call mmdrop() because we are in interrupt context, 
   * instead update mm->cpu_vm_mask.
@@ -478,15 +476,16 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
  
  void smp_stop_cpu(void)
  {
+       unsigned long flags;
         /*
          * Remove this CPU:
          */
         cpu_clear(smp_processor_id(), cpu_online_map);
-       local_irq_disable();
+       local_irq_save(flags);
  #ifndef CONFIG_XEN
         disable_local_APIC();
  #endif
-       local_irq_enable(); 
+       local_irq_restore(flags); 
  }
  
  static void smp_really_stop_cpu(void *dummy)
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c

index f99eb0e2b92c80962c47195989f1d6d33ada4904..4d4b9c61c535c8eec5714af9c1630c692fc6c2be 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
@@ -430,19 +430,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
  {
         conditional_sti(regs);
  
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
-                             regs->rip);
-               }
-       }
-#endif
-
         if (user_mode(regs)) {
                 struct task_struct *tsk = current;
  
@@ -515,20 +502,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
  {
         conditional_sti(regs);
  
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                      oops_in_progress++;
-                       printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
-                      oops_in_progress--;
-               }
-       }
-#endif
-
         if (user_mode(regs)) {
                 struct task_struct *tsk = current;
  
@@ -671,19 +644,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
         struct task_struct *tsk = current;
         siginfo_t info;
  
-#ifdef CONFIG_CHECKING
-       { 
-              /* RED-PEN interaction with debugger - could destroy gs */
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
-               }
-       }
-#endif
-
         get_debugreg(condition, 6);
  
         if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
@@ -896,6 +856,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
  }
  #endif
  
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+{
+}
+
  /*
   *  'math_state_restore()' saves the current math information in the
   * old math state array, and gets the new ones from the current task
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c

index ea99a442f09baccf18e46ea59b7db9b02750e078..5eb279f22dadd0ee2a5c5f70695e785aefce19f7 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
@@ -204,3 +204,6 @@ EXPORT_SYMBOL(flush_tlb_page);
  #endif
  
  EXPORT_SYMBOL(cpu_khz);
+
+EXPORT_SYMBOL(load_gs_index);
+
diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c

index 5948250abe63f2a66ba305316d4560d89e69f21c..62f0bd6dfed130f50a2e8f15cfd015d177a8b0da 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
@@ -24,7 +24,6 @@
  #include <linux/compiler.h>
  #include <linux/module.h>
  #include <linux/kprobes.h>
-#include <linux/percpu.h>
  
  #include <asm/system.h>
  #include <asm/uaccess.h>
@@ -36,8 +35,6 @@
  #include <asm-generic/sections.h>
  #include <asm/kdebug.h>
  
-DEFINE_PER_CPU(pgd_t *, cur_pgd);
-
  void bust_spinlocks(int yes)
  {
         int loglevel_save = console_loglevel;
@@ -149,9 +146,10 @@ void dump_pagetable(unsigned long address)
         pmd_t *pmd;
         pte_t *pte;
  
-       preempt_disable();
-       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
-       preempt_enable();
+       asm("movq %%cr3,%0" : "=r" (pgd));
+       pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
+
+       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); 
         pgd += pgd_index(address);
         printk("PGD %lx ", pgd_val(*pgd));
         if (bad_address(pgd)) goto bad;
@@ -252,9 +250,9 @@ static int vmalloc_fault(unsigned long address)
  
         /* On Xen the line below does not always work. Needs investigating! */
         /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
-       preempt_disable();
-       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
-       preempt_enable();
+       asm("movq %%cr3,%0" : "=r" (pgd));
+       pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
+       pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
         pgd += pgd_index(address);
         pgd_ref = pgd_offset_k(address);
         if (pgd_none(*pgd_ref))
@@ -330,22 +328,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
         if (!user_mode(regs))
                 error_code &= ~4; /* means kernel */
  
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
-               }
-       }
-#endif
-
         /* get the address */
         address = HYPERVISOR_shared_info->vcpu_info[
                 smp_processor_id()].arch.cr2;
-
         if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
                                         SIGSEGV) == NOTIFY_STOP)
                 return;
@@ -595,3 +580,10 @@ do_sigbus:
         force_sig_info(SIGBUS, &info, tsk);
         return;
  }
+
+static int __init enable_pagefaulttrace(char *str)
+{
+       page_fault_trace = 1;
+       return 0;
+}
+__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c

index 54af9798f9df06c27dd5257a72d0410a44042b11..b89a416c6426e28362c154c30f1f909a590d5c06 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
@@ -25,6 +25,7 @@
  #include <linux/pagemap.h>
  #include <linux/bootmem.h>
  #include <linux/proc_fs.h>
+#include <linux/pci.h>
  
  #include <asm/processor.h>
  #include <asm/system.h>
@@ -39,6 +40,7 @@
  #include <asm/mmu_context.h>
  #include <asm/proto.h>
  #include <asm/smp.h>
+#include <asm/sections.h>
  
  #include <xen/features.h>
  
@@ -52,7 +54,7 @@ extern unsigned long *contiguous_bitmap;
  extern void swiotlb_init(void);
  #endif
  
-extern char _stext[];
+static unsigned long dma_reserve __initdata;
  
  DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
  extern unsigned long start_pfn;
@@ -194,9 +196,6 @@ void show_mem(void)
  
  /* References to section boundaries */
  
-extern char _text, _etext, _edata, __bss_start, _end[];
-extern char __init_begin, __init_end;
-
  int after_bootmem;
  
  static void *spp_getpage(void)
@@ -492,10 +491,6 @@ static void __init find_early_table_space(unsigned long end)
  void __init xen_init_pt(void)
  {
         unsigned long addr, *page;
-       int i;
-
-       for (i = 0; i < NR_CPUS; i++)
-               per_cpu(cur_pgd, i) = init_mm.pgd;
  
         memset((void *)init_level4_pgt,   0, PAGE_SIZE);
         memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
@@ -628,9 +623,7 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
         __flush_tlb_all();
  }
  
-extern struct x8664_pda cpu_pda[NR_CPUS];
-
-void zap_low_mappings(void)
+void __cpuinit zap_low_mappings(int cpu)
  {
         /* this is not required for Xen */
  #if 0
@@ -638,32 +631,74 @@ void zap_low_mappings(void)
  #endif
  }
  
-#ifndef CONFIG_NUMA
-void __init paging_init(void)
+/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+__init void
+size_zones(unsigned long *z, unsigned long *h,
+          unsigned long start_pfn, unsigned long end_pfn)
  {
-       {
-               unsigned long zones_size[MAX_NR_ZONES];
-               unsigned long holes[MAX_NR_ZONES];
-               /*      unsigned int max_dma; */
+       int i;
+#ifndef CONFIG_XEN
+       unsigned long w;
+#endif
  
-               memset(zones_size, 0, sizeof(zones_size));
-               memset(holes, 0, sizeof(holes));
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               z[i] = 0;
+
+#ifndef CONFIG_XEN
+       if (start_pfn < MAX_DMA_PFN)
+               z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+       if (start_pfn < MAX_DMA32_PFN) {
+               unsigned long dma32_pfn = MAX_DMA32_PFN;
+               if (dma32_pfn > end_pfn)
+                       dma32_pfn = end_pfn;
+               z[ZONE_DMA32] = dma32_pfn - start_pfn;
+       }
+       z[ZONE_NORMAL] = end_pfn - start_pfn;
+
+       /* Remove lower zones from higher ones. */
+       w = 0;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               if (z[i])
+                       z[i] -= w;
+               w += z[i];
+       }
  
-               /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
-               /* if (end_pfn < max_dma) { */
-                       zones_size[ZONE_DMA] = end_pfn;
-#if 0
-                       holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
-               } else {
-                       zones_size[ZONE_DMA] = max_dma;
-                       holes[ZONE_DMA] = e820_hole_size(0, max_dma);
-                       zones_size[ZONE_NORMAL] = end_pfn - max_dma;
-                       holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
+       /* Compute holes */
+       w = start_pfn;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               unsigned long s = w;
+               w += z[i];
+               h[i] = e820_hole_size(s, w);
+       }
+
+       /* Add the space pace needed for mem_map to the holes too. */
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
+
+       /* The 16MB DMA zone has the kernel and other misc mappings.
+          Account them too */
+       if (h[ZONE_DMA]) {
+               h[ZONE_DMA] += dma_reserve;
+               if (h[ZONE_DMA] >= z[ZONE_DMA]) {
+                       printk(KERN_WARNING
+                               "Kernel too large and filling up ZONE_DMA?\n");
+                       h[ZONE_DMA] = z[ZONE_DMA];
                 }
-#endif
-               free_area_init_node(0, NODE_DATA(0), zones_size,
-                                   __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
         }
+#else
+       z[ZONE_DMA] = end_pfn;
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               h[i] = 0;
+#endif
+}
+
+#ifndef CONFIG_NUMA
+void __init paging_init(void)
+{
+       unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+       size_zones(zones, holes, 0, end_pfn);
+       free_area_init_node(0, NODE_DATA(0), zones,
+                           __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
  
         set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
         HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
@@ -780,19 +815,18 @@ void __init mem_init(void)
                 datasize >> 10,
                 initsize >> 10);
  
+#ifndef CONFIG_XEN
+#ifdef CONFIG_SMP
         /*
-        * Subtle. SMP is doing its boot stuff late (because it has to
-        * fork idle threads) - but it also needs low mappings for the
-        * protected-mode entry to work. We zap these entries only after
-        * the WP-bit has been tested.
+        * Sync boot_level4_pgt mappings with the init_level4_pgt
+        * except for the low identity mappings which are already zapped
+        * in init_level4_pgt. This sync-up is essential for AP's bringup
          */
-#ifndef CONFIG_SMP
-       zap_low_mappings();
+       memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
+#endif
  #endif
  }
  
-extern char __initdata_begin[], __initdata_end[];
-
  void free_initmem(void)
  {
  #ifdef __DO_LATER__
@@ -819,7 +853,7 @@ void free_initmem(void)
                 totalram_pages++;
         }
         memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
-       printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
+       printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
  #endif
  }
  
@@ -847,6 +881,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
  #else                  
         reserve_bootmem(phys, len);    
  #endif
+       if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
+               dma_reserve += len / PAGE_SIZE;
  }
  
  int kern_addr_valid(unsigned long addr) 
@@ -888,10 +924,6 @@ extern int exception_trace, page_fault_trace;
  static ctl_table debug_table2[] = {
         { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
           proc_dointvec },
-#ifdef CONFIG_CHECKING
-       { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
-         proc_dointvec },
-#endif
         { 0, }
  }; 
  
diff --git a/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile b/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile

index 2726fc318a2252664354ee09f64f9e4bb72df0f6..ab006dd36b46d02c9960655d2e47c8f7ed06d101 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile
@@ -11,7 +11,7 @@ obj-y         += fixup.o
  obj-$(CONFIG_ACPI)     += acpi.o
  obj-y                  += legacy.o irq.o common.o
  # mmconfig has a 64bit special
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
  
  obj-$(CONFIG_NUMA)     += k8-bus.o
  
diff --git a/linux-2.6-xen-sparse/drivers/Makefile b/linux-2.6-xen-sparse/drivers/Makefile

index 3a9ebb31d571cee1cccef7e3107a2b69239cd29f..e4c0be1d3fd303509a8f8eb0b4e8ce323f58e053 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/Makefile
+++ b/linux-2.6-xen-sparse/drivers/Makefile
@@ -7,6 +7,7 @@
  
  obj-$(CONFIG_PCI)              += pci/
  obj-$(CONFIG_PARISC)           += parisc/
+obj-$(CONFIG_RAPIDIO)          += rapidio/
  obj-y                          += video/
  obj-$(CONFIG_ACPI)             += acpi/
  # PnP must come after ACPI since it will eventually need to check if acpi
@@ -49,6 +50,7 @@ obj-$(CONFIG_ATA_OVER_ETH)    += block/aoe/
  obj-$(CONFIG_PARIDE)           += block/paride/
  obj-$(CONFIG_TC)               += tc/
  obj-$(CONFIG_USB)              += usb/
+obj-$(CONFIG_PCI)              += usb/
  obj-$(CONFIG_USB_GADGET)       += usb/gadget/
  obj-$(CONFIG_GAMEPORT)         += input/gameport/
  obj-$(CONFIG_INPUT)            += input/
@@ -68,3 +70,4 @@ obj-$(CONFIG_INFINIBAND)      += infiniband/
  obj-$(CONFIG_SGI_IOC4)         += sn/
  obj-y                          += firmware/
  obj-$(CONFIG_CRYPTO)           += crypto/
+obj-$(CONFIG_SUPERH)           += sh/
diff --git a/linux-2.6-xen-sparse/drivers/acpi/Kconfig b/linux-2.6-xen-sparse/drivers/acpi/Kconfig

index f726f37644577e1c49a5a488d89104443cdffd0e..cb31281eaa14965b74eabab5c4f57f59752a1a04 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/acpi/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/acpi/Kconfig
@@ -197,7 +197,6 @@ config ACPI_ASUS
  config ACPI_IBM
         tristate "IBM ThinkPad Laptop Extras"
         depends on X86
-       default y
         ---help---
           This is a Linux ACPI driver for the IBM ThinkPad laptops. It adds
           support for Fn-Fx key combinations, Bluetooth control, video
diff --git a/linux-2.6-xen-sparse/drivers/char/mem.c b/linux-2.6-xen-sparse/drivers/char/mem.c

index 80b6544c177425c8fcc11272eb80f893eca6e80b..c298422b35889ebad28ef845ae7e2a326bb9065c 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/mem.c
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c
@@ -233,9 +233,7 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
  static int mmap_mem(struct file * file, struct vm_area_struct * vma)
  {
  #if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-
-       vma->vm_page_prot = phys_mem_access_prot(file, offset,
+       vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                                                  vma->vm_end - vma->vm_start,
                                                  vma->vm_page_prot);
  #elif defined(pgprot_noncached)
@@ -926,7 +924,8 @@ static int __init chr_dev_init(void)
  
         mem_class = class_create(THIS_MODULE, "mem");
         for (i = 0; i < ARRAY_SIZE(devlist); i++) {
-               class_device_create(mem_class, MKDEV(MEM_MAJOR, devlist[i].minor),
+               class_device_create(mem_class, NULL,
+                                       MKDEV(MEM_MAJOR, devlist[i].minor),
                                         NULL, devlist[i].name);
                 devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
                                 S_IFCHR | devlist[i].mode, devlist[i].name);
diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig b/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig

index fe6ee3793dc4211a7ec2817643c2fea2825b537c..a0879aed69335b587a7d4281202645e618297ab0 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig
@@ -6,7 +6,7 @@ menu "TPM devices"
  
  config TCG_TPM
         tristate "TPM Hardware Support"
-       depends on EXPERIMENTAL && (PCI || XEN)
+       depends on EXPERIMENTAL
         ---help---
           If you have a TPM security chip in your system, which
           implements the Trusted Computing Group's specification,
diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c

index 8f79ec03485ac82d306f8bccf1937a7e556a112f..53018cbde1afe785573c6782b54c1639cb26e029 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c
@@ -47,6 +47,13 @@ static void user_reader_timeout(unsigned long ptr)
  {
         struct tpm_chip *chip = (struct tpm_chip *) ptr;
  
+       schedule_work(&chip->work);
+}
+
+static void timeout_work(void * ptr)
+{
+       struct tpm_chip *chip = ptr;
+
         down(&chip->buffer_mutex);
         atomic_set(&chip->data_pending, 0);
         memset(chip->data_buffer, 0, chip->vendor->buffersize);
@@ -146,8 +153,7 @@ ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
         __be32 index;
         char *str = buf;
  
-       struct tpm_chip *chip =
-           pci_get_drvdata(to_pci_dev(dev));
+       struct tpm_chip *chip = dev_get_drvdata(dev);
         if (chip == NULL)
                 return -ENODEV;
  
@@ -170,7 +176,8 @@ ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
                     < READ_PCR_RESULT_SIZE){
                         dev_dbg(chip->dev, "A TPM error (%d) occurred"
                                 " attempting to read PCR %d of %d\n",
-                               be32_to_cpu(*((__be32 *) (data + 6))), i, num_pcrs);
+                               be32_to_cpu(*((__be32 *) (data + 6))),
+                               i, num_pcrs);
                         goto out;
                 }
                 str += sprintf(str, "PCR-%02d: ", i);
@@ -198,17 +205,15 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
         int i, rc;
         char *str = buf;
  
-       struct tpm_chip *chip =
-           pci_get_drvdata(to_pci_dev(dev));
+       struct tpm_chip *chip = dev_get_drvdata(dev);
         if (chip == NULL)
                 return -ENODEV;
  
-       data = kmalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL);
+       data = kzalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL);
         if (!data)
                 return -ENOMEM;
  
         memcpy(data, readpubek, sizeof(readpubek));
-       memset(data + sizeof(readpubek), 0, 20);        /* zero nonce */
  
         if ((len = tpm_transmit(chip, data, READ_PUBEK_RESULT_SIZE)) <
             READ_PUBEK_RESULT_SIZE) {
@@ -252,7 +257,6 @@ out:
         kfree(data);
         return rc;
  }
-
  EXPORT_SYMBOL_GPL(tpm_show_pubek);
  
  #define CAP_VER_RESULT_SIZE 18
@@ -281,8 +285,7 @@ ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
         ssize_t len;
         char *str = buf;
  
-       struct tpm_chip *chip =
-           pci_get_drvdata(to_pci_dev(dev));
+       struct tpm_chip *chip = dev_get_drvdata(dev);
         if (chip == NULL)
                 return -ENODEV;
  
@@ -322,7 +325,6 @@ ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
  }
  EXPORT_SYMBOL_GPL(tpm_store_cancel);
  
-
  /*
   * Device file system interface to the TPM
   */
@@ -346,8 +348,7 @@ int tpm_open(struct inode *inode, struct file *file)
         }
  
         if (chip->num_opens) {
-               dev_dbg(chip->dev,
-                       "Another process owns this TPM\n");
+               dev_dbg(chip->dev, "Another process owns this TPM\n");
                 rc = -EBUSY;
                 goto err_out;
         }
@@ -373,7 +374,6 @@ err_out:
         spin_unlock(&driver_lock);
         return rc;
  }
-
  EXPORT_SYMBOL_GPL(tpm_open);
  
  int tpm_release(struct inode *inode, struct file *file)
@@ -384,16 +384,16 @@ int tpm_release(struct inode *inode, struct file *file)
         file->private_data = NULL;
         chip->num_opens--;
         del_singleshot_timer_sync(&chip->user_read_timer);
+       flush_scheduled_work();
         atomic_set(&chip->data_pending, 0);
         put_device(chip->dev);
         kfree(chip->data_buffer);
         spin_unlock(&driver_lock);
         return 0;
  }
-
  EXPORT_SYMBOL_GPL(tpm_release);
  
-ssize_t tpm_write(struct file * file, const char __user * buf,
+ssize_t tpm_write(struct file *file, const char __user *buf,
                   size_t size, loff_t * off)
  {
         struct tpm_chip *chip = file->private_data;
@@ -431,13 +431,14 @@ ssize_t tpm_write(struct file * file, const char __user * buf,
  
  EXPORT_SYMBOL_GPL(tpm_write);
  
-ssize_t tpm_read(struct file * file, char __user * buf,
+ssize_t tpm_read(struct file * file, char __user *buf,
                  size_t size, loff_t * off)
  {
         struct tpm_chip *chip = file->private_data;
         int ret_size;
         int pos, pending = 0;
  
+       flush_scheduled_work();
         ret_size = atomic_read(&chip->data_pending);
         if (ret_size > 0) {     /* relay data */
                 if (size < ret_size)
@@ -446,8 +447,7 @@ ssize_t tpm_read(struct file * file, char __user * buf,
                 pos = atomic_read(&chip->data_position);
  
                 down(&chip->buffer_mutex);
-               if (copy_to_user
-                   ((void __user *) buf, &chip->data_buffer[pos], ret_size)) {
+               if (copy_to_user(buf, &chip->data_buffer[pos], ret_size)) {
                         ret_size = -EFAULT;
                 } else {
                         pending = atomic_read(&chip->data_pending) - ret_size;
@@ -466,7 +466,6 @@ ssize_t tpm_read(struct file * file, char __user * buf,
  
         return ret_size;
  }
-
  EXPORT_SYMBOL_GPL(tpm_read);
  
  void tpm_remove_hardware(struct device *dev)
@@ -490,13 +489,13 @@ void tpm_remove_hardware(struct device *dev)
  
         sysfs_remove_group(&dev->kobj, chip->vendor->attr_group);
  
-       dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &= !(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
+       dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &=
+               ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
  
         kfree(chip);
  
         put_device(dev);
  }
-
  EXPORT_SYMBOL_GPL(tpm_remove_hardware);
  
  static u8 savestate[] = {
@@ -509,32 +508,30 @@ static u8 savestate[] = {
   * We are about to suspend. Save the TPM state
   * so that it can be restored.
   */
-int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state)
+int tpm_pm_suspend(struct device *dev, pm_message_t pm_state)
  {
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
         if (chip == NULL)
                 return -ENODEV;
  
         tpm_transmit(chip, savestate, sizeof(savestate));
         return 0;
  }
-
  EXPORT_SYMBOL_GPL(tpm_pm_suspend);
  
  /*
   * Resume from a power safe. The BIOS already restored
   * the TPM state.
   */
-int tpm_pm_resume(struct pci_dev *pci_dev)
+int tpm_pm_resume(struct device *dev)
  {
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
  
         if (chip == NULL)
                 return -ENODEV;
  
         return 0;
  }
-
  EXPORT_SYMBOL_GPL(tpm_pm_resume);
  
  /*
@@ -544,8 +541,7 @@ EXPORT_SYMBOL_GPL(tpm_pm_resume);
   * upon errant exit from this function specific probe function should call
   * pci_disable_device
   */
-int tpm_register_hardware(struct device *dev,
-                         struct tpm_vendor_specific *entry)
+int tpm_register_hardware(struct device *dev, struct tpm_vendor_specific *entry)
  {
  #define DEVNAME_SIZE 7
  
@@ -554,16 +550,16 @@ int tpm_register_hardware(struct device *dev,
         int i, j;
  
         /* Driver specific per-device data */
-       chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
         if (chip == NULL)
                 return -ENOMEM;
  
-       memset(chip, 0, sizeof(struct tpm_chip));
-
         init_MUTEX(&chip->buffer_mutex);
         init_MUTEX(&chip->tpm_mutex);
         INIT_LIST_HEAD(&chip->list);
  
+       INIT_WORK(&chip->work, timeout_work, chip);
+
         init_timer(&chip->user_read_timer);
         chip->user_read_timer.function = user_reader_timeout;
         chip->user_read_timer.data = (unsigned long) chip;
@@ -589,8 +585,7 @@ int tpm_register_hardware(struct device *dev,
  
  dev_num_search_complete:
         if (chip->dev_num < 0) {
-               dev_err(dev,
-                       "No available tpm device numbers\n");
+               dev_err(dev, "No available tpm device numbers\n");
                 kfree(chip);
                 return -ENODEV;
         } else if (chip->dev_num == 0)
@@ -628,7 +623,6 @@ dev_num_search_complete:
  
         return 0;
  }
-
  EXPORT_SYMBOL_GPL(tpm_register_hardware);
  
  MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h

index cd2535c525e800074869283a4668bd571faf59c3..2b3b9728fdc5d267262b46da6b8255cafc22c5c2 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h
@@ -19,11 +19,11 @@
   * 
   */
  #include <linux/module.h>
-#include <linux/version.h>
  #include <linux/pci.h>
  #include <linux/delay.h>
  #include <linux/fs.h>
  #include <linux/miscdevice.h>
+#include <linux/platform_device.h>
  
  enum tpm_timeout {
         TPM_TIMEOUT = 5,        /* msecs */
@@ -76,6 +76,7 @@ struct tpm_chip {
         struct semaphore buffer_mutex;
  
         struct timer_list user_read_timer;      /* user needs to claim result */
+       struct work_struct work;
         struct semaphore tpm_mutex;     /* tpm is processing */
  
         struct tpm_vendor_specific *vendor;
diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c

index 953acd771fad20999abfca51fa201445a50331af..4238693b37fb1abd12e8698072be463fadea67ed 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c
@@ -20,12 +20,7 @@
   */
  
  #include "tpm.h"
-
-/* Atmel definitions */
-enum tpm_atmel_addr {
-       TPM_ATMEL_BASE_ADDR_LO = 0x08,
-       TPM_ATMEL_BASE_ADDR_HI = 0x09
-};
+#include "tpm_atmel.h"
  
  /* write status bits */
  enum tpm_atmel_write_status {
@@ -40,7 +35,7 @@ enum tpm_atmel_read_status {
         ATML_STATUS_READY = 0x08
  };
  
-static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
+static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
  {
         u8 status, *hdr = buf;
         u32 size;
@@ -52,13 +47,12 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
                 return -EIO;
  
         for (i = 0; i < 6; i++) {
-               status = inb(chip->vendor->base + 1);
+               status = atmel_getb(chip, 1);
                 if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(chip->dev,
-                               "error reading header\n");
+                       dev_err(chip->dev, "error reading header\n");
                         return -EIO;
                 }
-               *buf++ = inb(chip->vendor->base);
+               *buf++ = atmel_getb(chip, 0);
         }
  
         /* size of the data received */
@@ -69,10 +63,9 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
                 dev_err(chip->dev,
                         "Recv size(%d) less than available space\n", size);
                 for (; i < size; i++) { /* clear the waiting data anyway */
-                       status = inb(chip->vendor->base + 1);
+                       status = atmel_getb(chip, 1);
                         if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                               dev_err(chip->dev,
-                                       "error reading data\n");
+                               dev_err(chip->dev, "error reading data\n");
                                 return -EIO;
                         }
                 }
@@ -81,17 +74,16 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
  
         /* read all the data available */
         for (; i < size; i++) {
-               status = inb(chip->vendor->base + 1);
+               status = atmel_getb(chip, 1);
                 if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(chip->dev,
-                               "error reading data\n");
+                       dev_err(chip->dev, "error reading data\n");
                         return -EIO;
                 }
-               *buf++ = inb(chip->vendor->base);
+               *buf++ = atmel_getb(chip, 0);
         }
  
         /* make sure data available is gone */
-       status = inb(chip->vendor->base + 1);
+       status = atmel_getb(chip, 1);
         if (status & ATML_STATUS_DATA_AVAIL) {
                 dev_err(chip->dev, "data available is stuck\n");
                 return -EIO;
@@ -100,14 +92,14 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
         return size;
  }
  
-static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count)
+static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count)
  {
         int i;
  
         dev_dbg(chip->dev, "tpm_atml_send:\n");
         for (i = 0; i < count; i++) {
                 dev_dbg(chip->dev, "%d 0x%x(%d)\n",  i, buf[i], buf[i]);
-               outb(buf[i], chip->vendor->base);
+               atmel_putb(buf[i], chip, 0);
         }
  
         return count;
@@ -115,12 +107,12 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count)
  
  static void tpm_atml_cancel(struct tpm_chip *chip)
  {
-       outb(ATML_STATUS_ABORT, chip->vendor->base + 1);
+       atmel_putb(ATML_STATUS_ABORT, chip, 1);
  }
  
  static u8 tpm_atml_status(struct tpm_chip *chip)
  {
-       return inb(chip->vendor->base + 1);
+       return atmel_getb(chip, 1);
  }
  
  static struct file_operations atmel_ops = {
@@ -142,7 +134,7 @@ static struct attribute* atmel_attrs[] = {
         &dev_attr_pcrs.attr,
         &dev_attr_caps.attr,
         &dev_attr_cancel.attr,
-       0,
+       NULL,
  };
  
  static struct attribute_group atmel_attr_grp = { .attrs = atmel_attrs };
@@ -159,27 +151,39 @@ static struct tpm_vendor_specific tpm_atmel = {
         .miscdev = { .fops = &atmel_ops, },
  };
  
-static int __devinit tpm_atml_init(struct pci_dev *pci_dev,
-                                  const struct pci_device_id *pci_id)
+static struct platform_device *pdev;
+
+static void atml_plat_remove(void)
  {
-       u8 version[4];
-       int rc = 0;
-       int lo, hi;
+       struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
  
-       if (pci_enable_device(pci_dev))
-               return -EIO;
+       if (chip) {
+               if (chip->vendor->have_region)
+                       atmel_release_region(chip->vendor->base,
+                                            chip->vendor->region_size);
+               atmel_put_base_addr(chip->vendor);
+               tpm_remove_hardware(chip->dev);
+               platform_device_unregister(pdev);
+       }
+}
+
+static struct device_driver atml_drv = {
+       .name = "tpm_atmel",
+       .bus = &platform_bus_type,
+       .owner = THIS_MODULE,
+       .suspend = tpm_pm_suspend,
+       .resume = tpm_pm_resume,
+};
  
-       lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO);
-       hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI);
+static int __init init_atmel(void)
+{
+       int rc = 0;
  
-       tpm_atmel.base = (hi<<8)|lo;
-       dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
+       driver_register(&atml_drv);
  
-       /* verify that it is an Atmel part */
-       if (tpm_read_index(TPM_ADDR, 4) != 'A' || tpm_read_index(TPM_ADDR, 5) != 'T'
-           || tpm_read_index(TPM_ADDR, 6) != 'M' || tpm_read_index(TPM_ADDR, 7) != 'L') {
+       if ((tpm_atmel.iobase = atmel_get_base_addr(&tpm_atmel)) == NULL) {
                 rc = -ENODEV;
-               goto out_err;
+               goto err_unreg_drv;
         }
  
         /* query chip for its version number */
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c

index 20712bfdf458adf386058a017ea05c66b7f53b03..c9c9085204d674258d29662b0042c75b51fa9c7e 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -811,7 +811,7 @@ static void do_tty_hangup(void *data)
         check_tty_count(tty, "do_tty_hangup");
         file_list_lock();
         /* This breaks for file handles being sent over AF_UNIX sockets ? */
-       list_for_each_entry(filp, &tty->tty_files, f_list) {
+       list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
                 if (filp->f_op->write == redirected_tty_write)
                         cons_filp = filp;
                 if (filp->f_op->write != tty_write)
@@ -1418,14 +1418,11 @@ end_init:
  
         /* Release locally allocated memory ... nothing placed in slots */
  free_mem_out:
-       if (o_tp)
-               kfree(o_tp);
+       kfree(o_tp);
         if (o_tty)
                 free_tty_struct(o_tty);
-       if (ltp)
-               kfree(ltp);
-       if (tp)
-               kfree(tp);
+       kfree(ltp);
+       kfree(tp);
         free_tty_struct(tty);
  
  fail_no_mem:
@@ -2730,7 +2727,7 @@ void tty_register_device(struct tty_driver *driver, unsigned index,
                 pty_line_name(driver, index, name);
         else
                 tty_line_name(driver, index, name);
-       class_device_create(tty_class, dev, device, name);
+       class_device_create(tty_class, NULL, dev, device, "%s", name);
  }
  
  /**
@@ -2985,14 +2982,14 @@ static int __init tty_init(void)
             register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
                 panic("Couldn't register /dev/tty driver\n");
         devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 0), S_IFCHR|S_IRUGO|S_IWUGO, "tty");
-       class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
+       class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
  
         cdev_init(&console_cdev, &console_fops);
         if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
             register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
                 panic("Couldn't register /dev/console driver\n");
         devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 1), S_IFCHR|S_IRUSR|S_IWUSR, "console");
-       class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 1), NULL, "console");
+       class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL, "console");
  
  #ifdef CONFIG_UNIX98_PTYS
         cdev_init(&ptmx_cdev, &ptmx_fops);
@@ -3000,7 +2997,7 @@ static int __init tty_init(void)
             register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0)
                 panic("Couldn't register /dev/ptmx driver\n");
         devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 2), S_IFCHR|S_IRUGO|S_IWUGO, "ptmx");
-       class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
+       class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
  #endif
  
  #ifdef CONFIG_VT
@@ -3011,7 +3008,7 @@ static int __init tty_init(void)
             register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
                 panic("Couldn't register /dev/tty0 driver\n");
         devfs_mk_cdev(MKDEV(TTY_MAJOR, 0), S_IFCHR|S_IRUSR|S_IWUSR, "vc/0");
-       class_device_create(tty_class, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+       class_device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
  
         vty_init();
   out_vt:
diff --git a/linux-2.6-xen-sparse/drivers/firmware/Kconfig b/linux-2.6-xen-sparse/drivers/firmware/Kconfig

index b4e2b840c1fe231be9d47f76d0dd24a508dd16b3..f0dff5ac12e9a3288b9b773131eccc27385f3685 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/firmware/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/firmware/Kconfig
@@ -60,6 +60,7 @@ config EFI_PCDP
  
  config DELL_RBU
         tristate "BIOS update support for DELL systems via sysfs"
+       depends on X86
         select FW_LOADER
         help
          Say m if you want to have the option of updating the BIOS for your
@@ -70,8 +71,7 @@ config DELL_RBU
  
  config DCDBAS
         tristate "Dell Systems Management Base Driver"
-       depends on X86 || X86_64
-       default m
+       depends on X86
         help
           The Dell Systems Management Base Driver provides a sysfs interface
           for systems management software to perform System Management
diff --git a/linux-2.6-xen-sparse/drivers/serial/Kconfig b/linux-2.6-xen-sparse/drivers/serial/Kconfig

index 0fc073557ecc938b4ebe3cc66a2d627f16951ed2..c6b8929988529931a13e423442399b647f5fcf24 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/serial/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig
@@ -10,7 +10,8 @@ menu "Serial drivers"
  # The new 8250/16550 serial drivers
  config SERIAL_8250
         tristate "8250/16550 and compatible serial support"
-       depends on (BROKEN || !(SPARC64 || SPARC32 || XEN_DISABLE_SERIAL))
+       depends on (BROKEN || !SPARC)
+       depends on !XEN_DISABLE_SERIAL
         select SERIAL_CORE
         ---help---
           This selects whether you want to include the driver for the standard
@@ -207,6 +208,14 @@ config SERIAL_8250_ACORN
           system, say Y to this option.  The driver can handle 1, 2, or 3 port
           cards.  If unsure, say N.
  
+config SERIAL_8250_AU1X00
+       bool "AU1X00 serial port support"
+       depends on SERIAL_8250 != n && SOC_AU1X00
+       help
+         If you have an Au1x00 board and want to use the serial port, say Y
+         to this option.  The driver can handle 1 or 2 serial ports.
+         If unsure, say N.
+
  comment "Non-8250 serial port support"
  
  config SERIAL_AMBA_PL010
@@ -461,14 +470,14 @@ config SERIAL_IMX_CONSOLE
  
  config SERIAL_SUNCORE
         bool
-       depends on SPARC32 || SPARC64
+       depends on SPARC
         select SERIAL_CORE
         select SERIAL_CORE_CONSOLE
         default y
  
  config SERIAL_SUNZILOG
         tristate "Sun Zilog8530 serial support"
-       depends on SPARC32 || SPARC64
+       depends on SPARC
         help
           This driver supports the Zilog8530 serial ports found on many Sparc
           systems.  Say Y or M if you want to be able to these serial ports.
@@ -483,7 +492,7 @@ config SERIAL_SUNZILOG_CONSOLE
  
  config SERIAL_SUNSU
         tristate "Sun SU serial support"
-       depends on (SPARC32 || SPARC64) && PCI
+       depends on SPARC && PCI
         help
           This driver supports the 8250 serial ports that run the keyboard and
           mouse on (PCI) UltraSPARC systems.  Say Y or M if you want to be able
@@ -499,7 +508,7 @@ config SERIAL_SUNSU_CONSOLE
  
  config SERIAL_MUX
         tristate "Serial MUX support"
-       depends on PARISC
+       depends on GSC
         select SERIAL_CORE
         default y
         ---help---
@@ -539,7 +548,7 @@ config PDC_CONSOLE
  
  config SERIAL_SUNSAB
         tristate "Sun Siemens SAB82532 serial support"
-       depends on (SPARC32 || SPARC64) && PCI
+       depends on SPARC && PCI
         help
           This driver supports the Siemens SAB82532 DUSCC serial ports on newer
           (PCI) UltraSPARC systems.  Say Y or M if you want to be able to these
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c

index 28f07388f1fd00e7624ad2601c30324d18cae563..e4e7e5cad415c124b6d37d63a73e01fee843ff69 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
@@ -92,14 +92,14 @@ static DECLARE_WORK(balloon_worker, balloon_process, NULL);
  static struct timer_list balloon_timer;
  
  /* Use the private and mapping fields of struct page as a list. */
-#define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
+#define PAGE_TO_LIST(p) ((struct list_head *)&p->u.private)
  #define LIST_TO_PAGE(l)                                \
-       (list_entry(((unsigned long *)l), struct page, private))
+       (list_entry(((unsigned long *)l), struct page, u.private))
  #define UNLIST_PAGE(p)                         \
         do {                                    \
                 list_del(PAGE_TO_LIST(p));      \
                 p->mapping = NULL;              \
-               p->private = 0;                 \
+               p->u.private = 0;               \
         } while(0)
  
  #define IPRINTK(fmt, args...) \
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c

index 994a7739e72129be82298d3e514be199101b3348..eddc134506a53dd121a33b907a0802ddb86dcc20 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
@@ -126,8 +126,10 @@ static void xen_smp_intr_exit(unsigned int cpu)
  
  static void cpu_bringup(void)
  {
-       if (!cpu_isset(smp_processor_id(), cpu_initialized))
+       if (!cpu_isset(smp_processor_id(), cpu_initialized)) {
                 cpu_init();
+               preempt_disable();
+       }
         local_irq_enable();
         cpu_idle();
  }
diff --git a/linux-2.6-xen-sparse/fs/Kconfig b/linux-2.6-xen-sparse/fs/Kconfig

index 14f7e56a1f1d4a5f0ba7171312e88f3a94dbdaf9..425f883b1f576543da4131f9b0b5c93e39dc94fe 100644 (file)
--- a/linux-2.6-xen-sparse/fs/Kconfig
+++ b/linux-2.6-xen-sparse/fs/Kconfig
@@ -810,7 +810,7 @@ config TMPFS
  
  config HUGETLBFS
         bool "HugeTLB file system support"
-       depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
+       depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
         depends !XEN
  
  config HUGETLB_PAGE
@@ -899,6 +899,7 @@ config AFFS_FS
  config HFS_FS
         tristate "Apple Macintosh file system support (EXPERIMENTAL)"
         depends on EXPERIMENTAL
+       select NLS
         help
           If you say Y here, you will be able to mount Macintosh-formatted
           floppy disks and hard drive partitions with full read-write access.
@@ -1051,6 +1052,19 @@ config JFFS2_FS_WRITEBUFFER
             - NOR flash with transparent ECC
             - DataFlash
  
+config JFFS2_SUMMARY
+       bool "JFFS2 summary support (EXPERIMENTAL)"
+       depends on JFFS2_FS && EXPERIMENTAL
+       default n
+       help
+         This feature makes it possible to use summary information
+         for faster filesystem mount.
+
+         The summary information can be inserted into a filesystem image
+         by the utility 'sumtool'.
+
+         If unsure, say 'N'.
+
  config JFFS2_COMPRESSION_OPTIONS
         bool "Advanced compression options for JFFS2"
         depends on JFFS2_FS
@@ -1072,10 +1086,10 @@ config JFFS2_ZLIB
         default y
          help
            Zlib is designed to be a free, general-purpose, legally unencumbered,
-          lossless data-compression library for use on virtually any computer 
+          lossless data-compression library for use on virtually any computer
            hardware and operating system. See <http://www.gzip.org/zlib/> for
            further information.
-          
+
            Say 'Y' if unsure.
  
  config JFFS2_RTIME
@@ -1097,7 +1111,7 @@ choice
          default JFFS2_CMODE_PRIORITY
          depends on JFFS2_FS
          help
-          You can set here the default compression mode of JFFS2 from 
+          You can set here the default compression mode of JFFS2 from
            the available compression modes. Don't touch if unsure.
  
  config JFFS2_CMODE_NONE
@@ -1108,13 +1122,13 @@ config JFFS2_CMODE_NONE
  config JFFS2_CMODE_PRIORITY
          bool "priority"
          help
-          Tries the compressors in a predefinied order and chooses the first 
+          Tries the compressors in a predefinied order and chooses the first
            successful one.
  
  config JFFS2_CMODE_SIZE
          bool "size (EXPERIMENTAL)"
          help
-          Tries all compressors and chooses the one which has the smallest 
+          Tries all compressors and chooses the one which has the smallest
            result.
  
  endchoice
@@ -1588,9 +1602,10 @@ config CIFS
           PC operating systems.  The CIFS protocol is fully supported by 
           file servers such as Windows 2000 (including Windows 2003, NT 4  
           and Windows XP) as well by Samba (which provides excellent CIFS
-         server support for Linux and many other operating systems). Currently
-         you must use the smbfs client filesystem to access older SMB servers
-         such as Windows 9x and OS/2.
+         server support for Linux and many other operating systems). Limited
+         support for Windows ME and similar servers is provided as well. 
+         You must use the smbfs client filesystem to access older SMB servers
+         such as OS/2 and DOS.
  
           The intent of the cifs module is to provide an advanced
           network file system client for mounting to CIFS compliant servers, 
@@ -1601,7 +1616,7 @@ config CIFS
           cifs if running only a (Samba) server. It is possible to enable both
           smbfs and cifs (e.g. if you are using CIFS for accessing Windows 2003
           and Samba 3 servers, and smbfs for accessing old servers). If you need 
-         to mount to Samba or Windows 2003 servers from this machine, say Y.
+         to mount to Samba or Windows from this machine, say Y.
  
  config CIFS_STATS
          bool "CIFS statistics"
@@ -1610,8 +1625,22 @@ config CIFS_STATS
            Enabling this option will cause statistics for each server share
           mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
  
+config CIFS_STATS2
+       bool "CIFS extended statistics"
+       depends on CIFS_STATS
+       help
+         Enabling this option will allow more detailed statistics on SMB
+         request timing to be displayed in /proc/fs/cifs/DebugData and also
+         allow optional logging of slow responses to dmesg (depending on the
+         value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+         These additional statistics may have a minor effect on performance
+         and memory utilization.
+
+         Unless you are a developer or are doing network performance analysis
+         or tuning, say N.
+
  config CIFS_XATTR
-        bool "CIFS extended attributes (EXPERIMENTAL)"
+        bool "CIFS extended attributes"
          depends on CIFS
          help
            Extended attributes are name:value pairs associated with inodes by
@@ -1623,11 +1652,11 @@ config CIFS_XATTR
            prefaced by the user namespace prefix. The system namespace
            (used by some filesystems to store ACLs) is not supported at
            this time.
-                                                                                                    
+
            If unsure, say N.
  
  config CIFS_POSIX
-        bool "CIFS POSIX Extensions (EXPERIMENTAL)"
+        bool "CIFS POSIX Extensions"
          depends on CIFS_XATTR
          help
            Enabling this option will cause the cifs client to attempt to
@@ -1640,10 +1669,28 @@ config CIFS_POSIX
  
  config CIFS_EXPERIMENTAL
           bool "CIFS Experimental Features (EXPERIMENTAL)"
-         depends on CIFS
+         depends on CIFS && EXPERIMENTAL
+         help
+           Enables cifs features under testing. These features are
+           experimental and currently include support for writepages
+           (multipage writebehind performance improvements) and directory
+           change notification ie fcntl(F_DNOTIFY) as well as some security
+           improvements.  Some also depend on setting at runtime the
+           pseudo-file /proc/fs/cifs/Experimental (which is disabled by
+           default). See the file fs/cifs/README for more details.
+
+           If unsure, say N.
+
+config CIFS_UPCALL
+         bool "CIFS Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
+         depends on CIFS_EXPERIMENTAL
+         select CONNECTOR
           help
-           Enables cifs features under testing. These features
-           are highly experimental.  If unsure, say N.
+           Enables an upcall mechanism for CIFS which will be used to contact
+           userspace helper utilities to provide SPNEGO packaged Kerberos
+           tickets which are needed to mount to certain secure servers
+           (for which more secure Kerberos authentication is required). If
+           unsure, say N.
  
  config NCP_FS
         tristate "NCP file system support (to mount NetWare volumes)"
diff --git a/linux-2.6-xen-sparse/include/asm-i386/atomic.h b/linux-2.6-xen-sparse/include/asm-i386/atomic.h

index 04247f3945bdc2808d6048de704cc1a46c01b769..0ee6d4aa76dc86859b5687d6e8270213b8e13db4 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/atomic.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/atomic.h
@@ -210,6 +210,27 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
         return atomic_add_return(-i,v);
  }
  
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)                             \
+({                                                             \
+       int c, old;                                             \
+       c = atomic_read(v);                                     \
+       while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+               c = old;                                        \
+       c != (u);                                               \
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
  #define atomic_inc_return(v)  (atomic_add_return(1,v))
  #define atomic_dec_return(v)  (atomic_sub_return(1,v))
  
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h

index 8ab62e2b96f022af4285d15d8631bef51b11131e..88ea076b1fdfb15671a6f9824e6a8791fa9006c7 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h
@@ -15,6 +15,8 @@
  
  extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
  
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
+
  DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
  
  struct Xgt_desc_struct {
@@ -38,8 +40,6 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
  #define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
  #define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
  
-#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
-
  /*
   * This is the ldt that every process will get unless we need
   * something other than this.
@@ -68,8 +68,7 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *ad
  
  static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
  {
-       _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
-           (int)addr, ((size << 3)-1), 0x82);
+       _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
  }
  
  #define LDT_entry_a(info) \
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h

index 26daf9f7f11254fe74904ae7fee6cf62c1fa9a54..e1e8c49410784f470476cc6e8b1c046dce5b923a 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h
@@ -63,7 +63,6 @@ static inline void switch_mm(struct mm_struct *prev,
                 cpu_set(cpu, next->cpu_vm_mask);
  
                 /* Re-load page tables: load_cr3(next->pgd) */
-               per_cpu(cur_pgd, cpu) = next->pgd;
                 op->cmd = MMUEXT_NEW_BASEPTR;
                 op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
                 op++;
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h

index fa02e67ea86b7ba58f06e53032ac2aff7279c987..095580f3a45cfb4d556427e84c65a9fad15c690f 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h
@@ -1,9 +1,8 @@
-#include <linux/config.h>
-
  #ifndef _ASMi386_PARAM_H
  #define _ASMi386_PARAM_H
  
  #ifdef __KERNEL__
+# include <linux/config.h>
  # define HZ            CONFIG_HZ       /* Internal kernel timer frequency */
  # define USER_HZ       100             /* .. some user interfaces are in "ticks" */
  # define CLOCKS_PER_SEC                (USER_HZ)       /* like times() */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h

index ac5f7ace61da6c4aaf544042e30830a99ffa7178..99a57bbd6f40653a16dfdfb04fea83ad4a7b7fc2 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
@@ -76,11 +76,6 @@
  #define pfn_pte_ma(pfn, prot)  __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
  #define pfn_pmd(pfn, prot)     __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
  
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
  /*
   * All present user pages are user-executable:
   */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h

index f4522694c76b5bcdc96934401a4d45132395e729..8c66fc16790c0d56128c45b0bd5845d4c72cbc01 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
@@ -103,11 +103,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
   */
  static inline void pud_clear (pud_t * pud) { }
  
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
  #define pud_page(pud) \
  ((struct page *) __va(pud_val(pud) & PAGE_MASK))
  
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h

index 7e58ca661d77751807c451373e84b39e9778fa1a..3a6f09c1cd5279eb4c3a98d23ffca2bf45f551c5 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
@@ -26,6 +26,9 @@
  #include <linux/list.h>
  #include <linux/spinlock.h>
  
+struct mm_struct;
+struct vm_area_struct;
+
  /*
   * ZERO_PAGE is a global shared page that is always zero: used
   * for zero-mapped memory areas etc..
@@ -204,7 +207,8 @@ extern unsigned long pg0[];
  #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
  #define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
  
-#define pmd_none(x)    (!pmd_val(x))
+/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+#define pmd_none(x)    (!(unsigned long)pmd_val(x))
  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
     can temporarily clear it. */
  #define pmd_present(x) (pmd_val(x))
@@ -326,8 +330,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
         return pte;
  }
  
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
  #define pmd_large(pmd) \
  ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
  
@@ -372,6 +374,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  #define pte_offset_kernel(dir, address) \
         ((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
  
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+#define pmd_page_kernel(pmd) \
+               ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
  /*
   * Helper function that returns the kernel pagetable entry controlling
   * the virtual address 'address'. NULL means no pagetable entry present.
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h

index b2a61b32af8c65414014ec49bddccae6bb0552b7..04bd9e74d0ee18a65bcbc996ed8d3911dd91e92a 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
@@ -66,7 +66,9 @@ struct cpuinfo_x86 {
         int     f00f_bug;
         int     coma_bug;
         unsigned long loops_per_jiffy;
-       unsigned char x86_num_cores;
+       unsigned char x86_max_cores;    /* cpuid returned max cores value */
+       unsigned char booted_cores;     /* number of cores as seen by OS */
+       unsigned char apicid;
  } __attribute__((__aligned__(SMP_CACHE_BYTES)));
  
  #define X86_VENDOR_INTEL 0
@@ -89,7 +91,6 @@ extern struct cpuinfo_x86 boot_cpu_data;
  extern struct cpuinfo_x86 new_cpu_data;
  extern struct tss_struct doublefault_tss;
  DECLARE_PER_CPU(struct tss_struct, init_tss);
-DECLARE_PER_CPU(pgd_t *, cur_pgd);
  
  #ifdef CONFIG_SMP
  extern struct cpuinfo_x86 cpu_data[];
@@ -724,4 +725,10 @@ extern void mtrr_bp_init(void);
  #define mtrr_bp_init() do {} while (0)
  #endif
  
+#ifdef CONFIG_X86_MCE
+extern void mcheck_init(struct cpuinfo_x86 *c);
+#else
+#define mcheck_init(c) do {} while(0)
+#endif
+
  #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h

index e246c92979a2965122fbd5890deca199bf5421b2..b29479ca9b3b82f9e64afbdd42a1b629d9f7aefd 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h
@@ -45,6 +45,8 @@ extern void unlock_ipi_call_lock(void);
  #define MAX_APICID 256
  extern u8 x86_cpu_to_apicid[];
  
+#define cpu_physical_id(cpu)   x86_cpu_to_apicid[cpu]
+
  #ifdef CONFIG_HOTPLUG_CPU
  extern void cpu_exit_clear(void);
  extern void cpu_uninit(void);
@@ -91,6 +93,10 @@ extern int __cpu_disable(void);
  extern void __cpu_die(unsigned int cpu);
  #endif /* !__ASSEMBLY__ */
  
+#else /* CONFIG_SMP */
+
+#define cpu_physical_id(cpu)           boot_cpu_physical_apicid
+
  #define NO_PROC_ID             0xFF            /* No processor magic marker */
  
  #endif
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h

index 228d664ce04cf1445edb99adc01838a4f63eea1c..9be3d06efbfebb4121b2d8949aa14e2bfbdad9dd 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
@@ -122,21 +122,22 @@ static inline unsigned long _get_base(char * addr)
  #define write_cr0(x) \
         __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
  
-#define read_cr2() ({ \
-       unsigned int __dummy; \
-       __asm__ __volatile__( \
-               "movl %%cr2,%0\n\t" \
-               :"=r" (__dummy)); \
-       __dummy; \
-})
+#define read_cr2() \
+       (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
  #define write_cr2(x) \
         __asm__ __volatile__("movl %0,%%cr2": :"r" (x));
  
-#define read_cr3() per_cpu(cur_pgd, smp_processor_id())
-#define write_cr3(x) do {                              \
-       xen_pt_switch((x));                             \
-       per_cpu(cur_pgd, smp_processor_id()) = (x);     \
-} while (/* CONSTCOND */0)
+#define read_cr3() ({ \
+       unsigned int __dummy; \
+       __asm__ ( \
+               "movl %%cr3,%0\n\t" \
+               :"=r" (__dummy)); \
+       machine_to_phys(__dummy); \
+})
+#define write_cr3(x) ({                                                \
+       maddr_t __dummy = phys_to_machine(x);                   \
+       __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy));  \
+})
  
  #define read_cr4() ({ \
         unsigned int __dummy; \
@@ -147,7 +148,6 @@ static inline unsigned long _get_base(char * addr)
  })
  #define write_cr4(x) \
         __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
-
  #define stts() (HYPERVISOR_fpu_taskswitch(1))
  
  #endif /* __KERNEL__ */
@@ -173,6 +173,8 @@ struct __xchg_dummy { unsigned long a[100]; };
  #define __xg(x) ((struct __xchg_dummy *)(x))
  
  
+#ifdef CONFIG_X86_CMPXCHG64
+
  /*
   * The semantics of XCHGCMP8B are a bit strange, this is why
   * there is a loop and the loading of %%eax and %%edx has to
@@ -227,6 +229,8 @@ static inline void __set_64bit_var (unsigned long long *ptr,
   __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
   __set_64bit(ptr, ll_low(value), ll_high(value)) )
  
+#endif
+
  /*
   * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
   * Note 2: xchg has side effect, so that attribute volatile is necessary,
@@ -265,6 +269,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
  
  #ifdef CONFIG_X86_CMPXCHG
  #define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
  #endif
  
  static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -281,22 +288,78 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
         case 2:
                 __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
                                      : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                      : "memory");
                 return prev;
         case 4:
                 __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
                                      : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                      : "memory");
                 return prev;
         }
         return old;
  }
  
-#define cmpxchg(ptr,o,n)\
-       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-                                       (unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       switch (size) {
+       case 1:
+               return cmpxchg_386_u8(ptr, old, new);
+       case 2:
+               return cmpxchg_386_u16(ptr, old, new);
+       case 4:
+               return cmpxchg_386_u32(ptr, old, new);
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)                                               \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       if (likely(boot_cpu_data.x86 > 3))                              \
+               __ret = __cmpxchg((ptr), (unsigned long)(o),            \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       else                                                            \
+               __ret = cmpxchg_386((ptr), (unsigned long)(o),          \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       __ret;                                                          \
+})
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+
+static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
+                                     unsigned long long new)
+{
+       unsigned long long prev;
+       __asm__ __volatile__(LOCK "cmpxchg8b %3"
+                            : "=A"(prev)
+                            : "b"((unsigned long)new),
+                              "c"((unsigned long)(new >> 32)),
+                              "m"(*__xg(ptr)),
+                              "0"(old)
+                            : "memory");
+       return prev;
+}
+
+#define cmpxchg64(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+                                       (unsigned long long)(n)))
+
+#endif
      
  #ifdef __KERNEL__
  struct alt_instr { 
diff --git a/linux-2.6-xen-sparse/include/asm-i386/rwsem.h b/linux-2.6-xen-sparse/include/asm-i386/rwsem.h

index 8ee70a1a3104fe9e59153bdf41972ce0797d1d9b..75751b7fcee2c4efb6afab367e9df025cd48ea97 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/rwsem.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/rwsem.h
@@ -285,5 +285,10 @@ LOCK                 "xadd %0,(%2)"
         return tmp+delta;
  }
  
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+       return (sem->count != 0);
+}
+
  #endif /* __KERNEL__ */
  #endif /* _I386_RWSEM_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/system.h b/linux-2.6-xen-sparse/include/asm-i386/system.h

index 398061d4714d5c241a0ef1f64c9fa5f24ea5454c..f27a87b2146990c52ae748b09c9936bab579eb63 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/system.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/system.h
@@ -167,6 +167,8 @@ struct __xchg_dummy { unsigned long a[100]; };
  #define __xg(x) ((struct __xchg_dummy *)(x))
  
  
+#ifdef CONFIG_X86_CMPXCHG64
+
  /*
   * The semantics of XCHGCMP8B are a bit strange, this is why
   * there is a loop and the loading of %%eax and %%edx has to
@@ -221,6 +223,8 @@ static inline void __set_64bit_var (unsigned long long *ptr,
   __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
   __set_64bit(ptr, ll_low(value), ll_high(value)) )
  
+#endif
+
  /*
   * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
   * Note 2: xchg has side effect, so that attribute volatile is necessary,
@@ -259,6 +263,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
  
  #ifdef CONFIG_X86_CMPXCHG
  #define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
  #endif
  
  static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -275,22 +282,78 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
         case 2:
                 __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
                                      : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                      : "memory");
                 return prev;
         case 4:
                 __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
                                      : "=a"(prev)
-                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "r"(new), "m"(*__xg(ptr)), "0"(old)
                                      : "memory");
                 return prev;
         }
         return old;
  }
  
-#define cmpxchg(ptr,o,n)\
-       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-                                       (unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       switch (size) {
+       case 1:
+               return cmpxchg_386_u8(ptr, old, new);
+       case 2:
+               return cmpxchg_386_u16(ptr, old, new);
+       case 4:
+               return cmpxchg_386_u32(ptr, old, new);
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)                                               \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       if (likely(boot_cpu_data.x86 > 3))                              \
+               __ret = __cmpxchg((ptr), (unsigned long)(o),            \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       else                                                            \
+               __ret = cmpxchg_386((ptr), (unsigned long)(o),          \
+                                       (unsigned long)(n), sizeof(*(ptr))); \
+       __ret;                                                          \
+})
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+
+static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
+                                     unsigned long long new)
+{
+       unsigned long long prev;
+       __asm__ __volatile__(LOCK "cmpxchg8b %3"
+                            : "=A"(prev)
+                            : "b"((unsigned long)new),
+                              "c"((unsigned long)(new >> 32)),
+                              "m"(*__xg(ptr)),
+                              "0"(old)
+                            : "memory");
+       return prev;
+}
+
+#define cmpxchg64(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+                                       (unsigned long long)(n)))
+
+#endif
      
  #ifdef __KERNEL__
  struct alt_instr { 
diff --git a/linux-2.6-xen-sparse/include/asm-um/page.h b/linux-2.6-xen-sparse/include/asm-um/page.h

index 5ce96d770830f9e5eec26e1256e745a35975dc5a..ee9bac86b745b491f0ffdd2e840815d346cdfb69 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-um/page.h
+++ b/linux-2.6-xen-sparse/include/asm-um/page.h
@@ -115,7 +115,7 @@ extern unsigned long uml_physmem;
  #define pfn_valid(pfn) ((pfn) < max_mapnr)
  #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
  
-extern struct page *arch_validate(struct page *page, int mask, int order);
+extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
  #define HAVE_ARCH_VALIDATE
  
  extern int arch_free_page(struct page *page, int order);
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h

index c821d76e8428bf3d3bb729663e05ce6d1aa57e73..c9a5afb71117ef69e0591e1c3d8a558fb61885c3 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h
@@ -114,16 +114,19 @@ static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsig
  
  static inline void set_intr_gate(int nr, void *func) 
  { 
+       BUG_ON((unsigned)nr > 0xFF);
         _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
  } 
  
  static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
  { 
+       BUG_ON((unsigned)nr > 0xFF);
         _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
  } 
  
  static inline void set_system_gate(int nr, void *func) 
  { 
+       BUG_ON((unsigned)nr > 0xFF);
         _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
  } 
  
@@ -145,10 +148,16 @@ static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned
  
  static inline void set_tss_desc(unsigned cpu, void *addr)
  { 
+       /*
+        * sizeof(unsigned long) coming from an extra "long" at the end
+        * of the iobitmap. See tss_struct definition in processor.h
+        *
+        * -1? seg base+limit should be pointing to the address of the
+        * last valid byte
+        */
          set_tssldt_descriptor((struct ldttss_desc *)&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS], 
-                              (unsigned long)addr, 
-                              DESC_TSS,
-                              sizeof(struct tss_struct) - 1);
+               (unsigned long)addr, DESC_TSS,
+               IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
  } 
  
  static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h

index d49e9dae9e3ca84699f16987cd2ec0182153d9bf..871c01b4ce209529632f0059b1aa9c4c5d09b1c9 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h
@@ -56,7 +56,7 @@ struct hw_interrupt_type;
  #define CALL_FUNCTION_VECTOR   0xfc
  #define KDB_VECTOR             0xfb    /* reserved for KDB */
  #define THERMAL_APIC_VECTOR    0xfa
-/* 0xf9 free */
+#define THRESHOLD_APIC_VECTOR   0xf9
  #define INVALIDATE_TLB_VECTOR_END      0xf8
  #define INVALIDATE_TLB_VECTOR_START    0xf0    /* f0-f8 used for TLB flush */
  
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h

index c347fac15c6d91fe28c86cce3dbb00aaa1c9885c..f93f5372e01f187d62f41463c125482b56496069 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
@@ -60,6 +60,12 @@ extern void mm_pin(struct mm_struct *mm);
  extern void mm_unpin(struct mm_struct *mm);
  void mm_pin_all(void);
  
+static inline void load_cr3(pgd_t *pgd)
+{
+       asm volatile("movq %0,%%cr3" :: "r" (phys_to_machine(__pa(pgd))) :
+                    "memory");
+}
+
  static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
                              struct task_struct *tsk)
  {
@@ -79,7 +85,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                 set_bit(cpu, &next->cpu_vm_mask);
  
                 /* load_cr3(next->pgd) */
-               per_cpu(cur_pgd, smp_processor_id()) = next->pgd;
                 op->cmd = MMUEXT_NEW_BASEPTR;
                 op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
                 op++;
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h

index d9602b55cac7f253e3ad5a3cd1bb4e6d072f8b29..1b704f20859865708aba1f8279bf4dd3b0a02f04 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
@@ -31,7 +31,7 @@
  #define PAGE_SIZE      (1UL << PAGE_SHIFT)
  #endif
  #define PAGE_MASK      (~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK     (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT))
+#define PHYSICAL_PAGE_MASK     (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
  
  #define THREAD_ORDER 1 
  #ifdef __ASSEMBLY__
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h

index 035dfe07e6236fce30dd5a3dbf6b4ddae1f364f9..8d29464ddd0b4e8ecba363fb7daaf9863fbea9c3 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h
@@ -1,9 +1,8 @@
-#include <linux/config.h>
-
  #ifndef _ASMx86_64_PARAM_H
  #define _ASMx86_64_PARAM_H
  
  #ifdef __KERNEL__
+# include <linux/config.h>
  # define HZ            CONFIG_HZ       /* Internal kernel timer frequency */
  # define USER_HZ       100             /* .. some user interfaces are in "ticks */
  # define CLOCKS_PER_SEC                (USER_HZ)       /* like times() */
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h

index f03452a9160ac5f317028fd83b18ff108bde038b..f2e59f683876f3957a3ac6f3ddb639a8ebc5deed 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
@@ -39,6 +39,7 @@ extern pud_t level3_physmem_pgt[512];
  extern pud_t level3_ident_pgt[512];
  extern pmd_t level2_kernel_pgt[512];
  extern pgd_t init_level4_pgt[];
+extern pgd_t boot_level4_pgt[];
  extern unsigned long __supported_pte_mask;
  
  #define swapper_pg_dir init_level4_pgt
@@ -143,6 +144,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  }
  #endif
  
+struct mm_struct;
+
  static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
  {
         pte_t pte;
@@ -405,8 +408,6 @@ static inline int pmd_large(pmd_t pte) {
   * and a page entry and page directory to the page they refer to.
   */
  
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
  /*
   * Level 4 access.
   * Never use these in the common code.
@@ -450,7 +451,7 @@ static inline pud_t *pud_offset_k(unsigned long address)
  #define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
  #define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
  #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
-#define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
  
  #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
  #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h

index 709073972364ecfa34ae47354b0f371d08020c6b..34722412e4ef5a626043ca46d36ce8772484b212 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
@@ -61,10 +61,12 @@ struct cpuinfo_x86 {
         int     x86_cache_alignment;
         int     x86_tlbsize;    /* number of 4K pages in DTLB/ITLB combined(in pages)*/
          __u8    x86_virt_bits, x86_phys_bits;
-       __u8    x86_num_cores;
+       __u8    x86_max_cores;  /* cpuid returned max cores value */
          __u32   x86_power;     
         __u32   extended_cpuid_level;   /* Max extended CPUID function supported */
         unsigned long loops_per_jiffy;
+       __u8    apicid;
+       __u8    booted_cores;   /* number of cores as seen by OS */
  } ____cacheline_aligned;
  
  #define X86_VENDOR_INTEL 0
@@ -168,11 +170,6 @@ static inline void clear_in_cr4 (unsigned long mask)
  }
  
  
-#define load_cr3(pgdir) do {                           \
-       xen_pt_switch(__pa(pgdir));                     \
-       per_cpu(cur_pgd, smp_processor_id()) = pgdir;   \
-} while (/* CONSTCOND */0)
-
  /*
   * Bus types
   */
@@ -247,7 +244,6 @@ struct tss_struct {
  
  extern struct cpuinfo_x86 boot_cpu_data;
  DECLARE_PER_CPU(struct tss_struct,init_tss);
-DECLARE_PER_CPU(pgd_t *, cur_pgd);
  
  #define ARCH_MIN_TASKALIGN     16
  
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h

index 6eef509e207ee52c309bbc0847d4233e71716c88..8f33a1d9e4f02477bad30a964600cdf1aadc55b6 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h
@@ -46,7 +46,6 @@ extern void lock_ipi_call_lock(void);
  extern void unlock_ipi_call_lock(void);
  extern int smp_num_siblings;
  extern void smp_send_reschedule(int cpu);
-extern void zap_low_mappings(void);
  void smp_stop_cpu(void);
  extern int smp_call_function_single(int cpuid, void (*func) (void *info),
                                 void *info, int retry, int wait);
@@ -83,6 +82,8 @@ extern int safe_smp_processor_id(void);
  extern int __cpu_disable(void);
  extern void __cpu_die(unsigned int cpu);
  extern void prefill_possible_map(void);
+extern unsigned num_processors;
+extern unsigned disabled_cpus;
  
  #endif /* !ASSEMBLY */
  
@@ -139,5 +140,11 @@ static __inline int logical_smp_processor_id(void)
  #endif
  #endif
  
+#ifdef CONFIG_SMP
+#define cpu_physical_id(cpu)           x86_cpu_to_apicid[cpu]
+#else
+#define cpu_physical_id(cpu)           boot_cpu_id
+#endif
+
  #endif
  
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h

index 352edd6a2e3dd68cc6bdb554bc45d62e25741479..66432b201d985ca85f31c00de647ddd4d930b493 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
@@ -10,6 +10,12 @@
  
  #ifdef __KERNEL__
  
+#ifdef CONFIG_SMP
+#define __vcpu_id smp_processor_id()
+#else
+#define __vcpu_id 0
+#endif
+
  #ifdef CONFIG_SMP
  #define LOCK_PREFIX "lock ; "
  #else
@@ -325,7 +331,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
  do {                                                                   \
         vcpu_info_t *_vcpu;                                             \
         preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
         _vcpu->evtchn_upcall_mask = 1;                                  \
         preempt_enable_no_resched();                                    \
         barrier();                                                      \
@@ -336,7 +342,7 @@ do {                                                                        \
         vcpu_info_t *_vcpu;                                             \
         barrier();                                                      \
         preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
         _vcpu->evtchn_upcall_mask = 0;                                  \
         barrier(); /* unmask then check (avoid races) */                \
         if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
@@ -348,7 +354,7 @@ do {                                                                        \
  do {                                                                   \
         vcpu_info_t *_vcpu;                                             \
         preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
         (x) = _vcpu->evtchn_upcall_mask;                                \
         preempt_enable();                                               \
  } while (0)
@@ -358,7 +364,7 @@ do {                                                                        \
         vcpu_info_t *_vcpu;                                             \
         barrier();                                                      \
         preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
         if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                 barrier(); /* unmask then check (avoid races) */        \
                 if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
@@ -374,7 +380,7 @@ do {                                                                        \
  do {                                                                   \
         vcpu_info_t *_vcpu;                                             \
         preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
         (x) = _vcpu->evtchn_upcall_mask;                                \
         _vcpu->evtchn_upcall_mask = 1;                                  \
         preempt_enable_no_resched();                                    \
@@ -394,7 +400,7 @@ void cpu_idle_wait(void);
  ({     int ___x;                                                       \
         vcpu_info_t *_vcpu;                                             \
         preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id];          \
         ___x = (_vcpu->evtchn_upcall_mask != 0);                        \
         preempt_enable_no_resched();                                    \
         ___x; })
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/io_ports.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/io_ports.h

deleted file mode 100644 (file)

index a96d9f6..0000000
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/io_ports.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- *  arch/i386/mach-generic/io_ports.h
- *
- *  Machine specific IO port address definition for generic.
- *  Written by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_IO_PORTS_H
-#define _MACH_IO_PORTS_H
-
-/* i8253A PIT registers */
-#define PIT_MODE               0x43
-#define PIT_CH0                        0x40
-#define PIT_CH2                        0x42
-
-/* i8259A PIC registers */
-#define PIC_MASTER_CMD         0x20
-#define PIC_MASTER_IMR         0x21
-#define PIC_MASTER_ISR         PIC_MASTER_CMD
-#define PIC_MASTER_POLL                PIC_MASTER_ISR
-#define PIC_MASTER_OCW3                PIC_MASTER_ISR
-#define PIC_SLAVE_CMD          0xa0
-#define PIC_SLAVE_IMR          0xa1
-
-/* i8259A PIC related value */
-#define PIC_CASCADE_IR         2
-#define MASTER_ICW4_DEFAULT    0x01
-#define SLAVE_ICW4_DEFAULT     0x01
-#define PIC_ICW4_AEOI          2
-
-#endif /* !_MACH_IO_PORTS_H */
diff --git a/linux-2.6-xen-sparse/include/linux/gfp.h b/linux-2.6-xen-sparse/include/linux/gfp.h

index 27e9f337f215ce07c0e5d4d64c8c193aad85ed07..ea28b98cbafb7d88c1a55ced388553fa03aecfd9 100644 (file)
--- a/linux-2.6-xen-sparse/include/linux/gfp.h
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h
@@ -11,9 +11,16 @@ struct vm_area_struct;
  /*
   * GFP bitmasks..
   */
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA      0x01u
-#define __GFP_HIGHMEM  0x02u
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
+#define __GFP_DMA      ((__force gfp_t)0x01u)
+#define __GFP_HIGHMEM  ((__force gfp_t)0x02u)
+#ifdef CONFIG_DMA_IS_DMA32
+#define __GFP_DMA32    ((__force gfp_t)0x01)   /* ZONE_DMA is ZONE_DMA32 */
+#elif BITS_PER_LONG < 64
+#define __GFP_DMA32    ((__force gfp_t)0x00)   /* ZONE_NORMAL is ZONE_DMA32 */
+#else
+#define __GFP_DMA32    ((__force gfp_t)0x04)   /* Has own ZONE_DMA32 */
+#endif
  
  /*
   * Action modifiers - doesn't change the zoning
@@ -26,30 +33,29 @@ struct vm_area_struct;
   *
   * __GFP_NORETRY: The VM implementation must not retry indefinitely.
   */
-#define __GFP_WAIT     0x10u   /* Can wait and reschedule? */
-#define __GFP_HIGH     0x20u   /* Should access emergency pools? */
-#define __GFP_IO       0x40u   /* Can start physical IO? */
-#define __GFP_FS       0x80u   /* Can call down to low-level FS? */
-#define __GFP_COLD     0x100u  /* Cache-cold page required */
-#define __GFP_NOWARN   0x200u  /* Suppress page allocation failure warning */
-#define __GFP_REPEAT   0x400u  /* Retry the allocation.  Might fail */
-#define __GFP_NOFAIL   0x800u  /* Retry for ever.  Cannot fail */
-#define __GFP_NORETRY  0x1000u /* Do not retry.  Might fail */
-#define __GFP_NO_GROW  0x2000u /* Slab internal usage */
-#define __GFP_COMP     0x4000u /* Add compound page metadata */
-#define __GFP_ZERO     0x8000u /* Return zeroed page on success */
-#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
-#define __GFP_NORECLAIM  0x20000u /* No realy zone reclaim during allocation */
-#define __GFP_HARDWALL   0x40000u /* Enforce hardwall cpuset memory allocs */
+#define __GFP_WAIT     ((__force gfp_t)0x10u)  /* Can wait and reschedule? */
+#define __GFP_HIGH     ((__force gfp_t)0x20u)  /* Should access emergency pools? */
+#define __GFP_IO       ((__force gfp_t)0x40u)  /* Can start physical IO? */
+#define __GFP_FS       ((__force gfp_t)0x80u)  /* Can call down to low-level FS? */
+#define __GFP_COLD     ((__force gfp_t)0x100u) /* Cache-cold page required */
+#define __GFP_NOWARN   ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */
+#define __GFP_REPEAT   ((__force gfp_t)0x400u) /* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL   ((__force gfp_t)0x800u) /* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY  ((__force gfp_t)0x1000u)/* Do not retry.  Might fail */
+#define __GFP_NO_GROW  ((__force gfp_t)0x2000u)/* Slab internal usage */
+#define __GFP_COMP     ((__force gfp_t)0x4000u)/* Add compound page metadata */
+#define __GFP_ZERO     ((__force gfp_t)0x8000u)/* Return zeroed page on success */
+#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+#define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
  
  #define __GFP_BITS_SHIFT 20    /* Room for 20 __GFP_FOO bits */
-#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
  
  /* if you forget to add the bitmask here kernel will crash, period */
  #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
                         __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
                         __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-                       __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL)
+                       __GFP_NOMEMALLOC|__GFP_HARDWALL)
  
  #define GFP_ATOMIC     (__GFP_HIGH)
  #define GFP_NOIO       (__GFP_WAIT)
@@ -64,6 +70,16 @@ struct vm_area_struct;
  
  #define GFP_DMA                __GFP_DMA
  
+/* 4GB DMA on some platforms */
+#define GFP_DMA32      __GFP_DMA32
+
+
+static inline int gfp_zone(gfp_t gfp)
+{
+       int zone = GFP_ZONEMASK & (__force int) gfp;
+       BUG_ON(zone >= GFP_ZONETYPES);
+       return zone;
+}
  
  /*
   * There is only one page-allocator function, and two main namespaces to
@@ -98,7 +114,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
                 return NULL;
  
         return __alloc_pages(gfp_mask, order,
-               NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+               NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
  }
  
  #ifdef CONFIG_NUMA
diff --git a/linux-2.6-xen-sparse/include/linux/irq.h b/linux-2.6-xen-sparse/include/linux/irq.h

index 1663ba5853ded514edf3c2042f9f72b1668ba824..d1057f4c6dca6b40e3fbce2acae57efc8f6e3de1 100644 (file)
--- a/linux-2.6-xen-sparse/include/linux/irq.h
+++ b/linux-2.6-xen-sparse/include/linux/irq.h
@@ -10,6 +10,7 @@
   */
  
  #include <linux/config.h>
+#include <linux/smp.h>
  
  #if !defined(CONFIG_ARCH_S390)
  
diff --git a/linux-2.6-xen-sparse/include/linux/mm.h b/linux-2.6-xen-sparse/include/linux/mm.h

index 2aa53d7bc2266a1a394216d5456c3691dd2b87a4..af29f6905dee0c1e4ded8437eb5bb99ee03fe57d 100644 (file)
--- a/linux-2.6-xen-sparse/include/linux/mm.h
+++ b/linux-2.6-xen-sparse/include/linux/mm.h
@@ -144,7 +144,8 @@ extern unsigned int kobjsize(const void *objp);
  
  #define VM_GROWSDOWN   0x00000100      /* general info on the segment */
  #define VM_GROWSUP     0x00000200
-#define VM_SHM         0x00000400      /* shared memory area, don't swap out */
+#define VM_SHM         0x00000000      /* Means nothing: delete it later */
+#define VM_PFNMAP      0x00000400      /* Page-ranges managed without "struct page", just pure PFN */
  #define VM_DENYWRITE   0x00000800      /* ETXTBSY on write attempts.. */
  
  #define VM_EXECUTABLE  0x00001000
@@ -157,13 +158,14 @@ extern unsigned int kobjsize(const void *objp);
  
  #define VM_DONTCOPY    0x00020000      /* Do not copy this vma on fork */
  #define VM_DONTEXPAND  0x00040000      /* Cannot expand with mremap() */
-#define VM_RESERVED    0x00080000      /* Don't unmap it from swap_out */
+#define VM_RESERVED    0x00080000      /* Count as reserved_vm like IO */
  #define VM_ACCOUNT     0x00100000      /* Is a VM accounted object */
  #define VM_HUGETLB     0x00400000      /* Huge TLB Page VM */
  #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
  #define VM_MAPPED_COPY 0x01000000      /* T if mapped copy of data (nommu mmap) */
+#define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" done on it */
  #ifdef CONFIG_XEN
-#define VM_FOREIGN     0x02000000      /* Has pages belonging to another VM */
+#define VM_FOREIGN     0x04000000      /* Has pages belonging to another VM */
  #endif
  
  #ifndef VM_STACK_DEFAULT_FLAGS         /* arch can override this */
@@ -209,12 +211,6 @@ struct vm_operations_struct {
  struct mmu_gather;
  struct inode;
  
-#ifdef ARCH_HAS_ATOMIC_UNSIGNED
-typedef unsigned page_flags_t;
-#else
-typedef unsigned long page_flags_t;
-#endif
-
  /*
   * Each physical page in the system has a struct page associated with
   * it to keep track of whatever it is we are using the page for at the
@@ -222,20 +218,25 @@ typedef unsigned long page_flags_t;
   * a page.
   */
  struct page {
-       page_flags_t flags;             /* Atomic flags, some possibly
+       unsigned long flags;            /* Atomic flags, some possibly
                                          * updated asynchronously */
         atomic_t _count;                /* Usage count, see below. */
         atomic_t _mapcount;             /* Count of ptes mapped in mms,
                                          * to show when page is mapped
                                          * & limit reverse map searches.
                                          */
-       unsigned long private;          /* Mapping-private opaque data:
+       union {
+               unsigned long private;  /* Mapping-private opaque data:
                                          * usually used for buffer_heads
                                          * if PagePrivate set; used for
                                          * swp_entry_t if PageSwapCache
                                          * When page is free, this indicates
                                          * order in the buddy system.
                                          */
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+               spinlock_t ptl;
+#endif
+       } u;
         struct address_space *mapping;  /* If low bit clear, points to
                                          * inode address_space, or NULL.
                                          * If page mapped as anonymous
@@ -263,6 +264,9 @@ struct page {
  #endif /* WANT_PAGE_VIRTUAL */
  };
  
+#define page_private(page)             ((page)->u.private)
+#define set_page_private(page, v)      ((page)->u.private = (v))
+
  /*
   * FIXME: take this include out, include page-flags.h in
   * files which need it (119 of them)
@@ -312,41 +316,22 @@ struct page {
  
  extern void FASTCALL(__page_cache_release(struct page *));
  
-#ifdef CONFIG_HUGETLB_PAGE
-
-static inline int page_count(struct page *p)
+static inline int page_count(struct page *page)
  {
-       if (PageCompound(p))
-               p = (struct page *)p->private;
-       return atomic_read(&(p)->_count) + 1;
+       if (PageCompound(page))
+               page = (struct page *)page_private(page);
+       return atomic_read(&page->_count) + 1;
  }
  
  static inline void get_page(struct page *page)
  {
         if (unlikely(PageCompound(page)))
-               page = (struct page *)page->private;
+               page = (struct page *)page_private(page);
         atomic_inc(&page->_count);
  }
  
  void put_page(struct page *page);
  
-#else          /* CONFIG_HUGETLB_PAGE */
-
-#define page_count(p)          (atomic_read(&(p)->_count) + 1)
-
-static inline void get_page(struct page *page)
-{
-       atomic_inc(&page->_count);
-}
-
-static inline void put_page(struct page *page)
-{
-       if (!PageReserved(page) && put_page_testzero(page))
-               __page_cache_release(page);
-}
-
-#endif         /* CONFIG_HUGETLB_PAGE */
-
  /*
   * Multiple processes may "see" the same page. E.g. for untouched
   * mappings of /dev/null, all processes see the same page full of
@@ -430,7 +415,7 @@ static inline void put_page(struct page *page)
  #endif
  
  /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
-#define SECTIONS_PGOFF         ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+#define SECTIONS_PGOFF         ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
  #define NODES_PGOFF            (SECTIONS_PGOFF - NODES_WIDTH)
  #define ZONES_PGOFF            (NODES_PGOFF - ZONES_WIDTH)
  
@@ -590,7 +575,7 @@ static inline int PageAnon(struct page *page)
  static inline pgoff_t page_index(struct page *page)
  {
         if (unlikely(PageSwapCache(page)))
-               return page->private;
+               return page_private(page);
         return page->index;
  }
  
@@ -683,9 +668,10 @@ struct zap_details {
         unsigned long truncate_count;           /* Compare vm_truncate_count */
  };
  
+struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t);
  unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
                 unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb,
                 struct vm_area_struct *start_vma, unsigned long start_addr,
                 unsigned long end_addr, unsigned long *nr_accounted,
                 struct zap_details *);
@@ -707,10 +693,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
  }
  
  extern int vmtruncate(struct inode * inode, loff_t offset);
-extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
-extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
  extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
  extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
  extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
@@ -726,6 +708,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
  
  int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
                 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
  
  int __set_page_dirty_buffers(struct page *page);
  int __set_page_dirty_nobuffers(struct page *page);
@@ -750,7 +733,7 @@ extern unsigned long do_mremap(unsigned long addr,
   * The callback will be passed nr_to_scan == 0 when the VM is querying the
   * cache size, so a fastpath for that case is appropriate.
   */
-typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
  
  /*
   * Add an aging callback.  The int is the number of 'seeks' it takes
@@ -762,38 +745,85 @@ struct shrinker;
  extern struct shrinker *set_shrinker(int, shrinker_t);
  extern void remove_shrinker(struct shrinker *shrinker);
  
-/*
- * On a two-level or three-level page table, this ends up being trivial. Thus
- * the inlining and the symmetry break with pte_alloc_map() that does all
- * of this out-of-line.
- */
+extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
+
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+
  /*
   * The following ifdef needed to get the 4level-fixup.h header to work.
   * Remove it when 4level-fixup.h has been removed.
   */
-#ifdef CONFIG_MMU
-#ifndef __ARCH_HAS_4LEVEL_HACK 
+#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
  static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
  {
-       if (pgd_none(*pgd))
-               return __pud_alloc(mm, pgd, address);
-       return pud_offset(pgd, address);
+       return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
+               NULL: pud_offset(pgd, address);
  }
  
  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
  {
-       if (pud_none(*pud))
-               return __pmd_alloc(mm, pud, address);
-       return pmd_offset(pud, address);
+       return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
+               NULL: pmd_offset(pud, address);
  }
-#endif
-#endif /* CONFIG_MMU */
+#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page)    &((page)->u.ptl)
+#define pte_lock_init(_page)   do {                                    \
+       spin_lock_init(__pte_lockptr(_page));                           \
+} while (0)
+#define pte_lock_deinit(page)  ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd)   ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page)    do {} while (0)
+#define pte_lock_deinit(page)  do {} while (0)
+#define pte_lockptr(mm, pmd)   ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define pte_offset_map_lock(mm, pmd, address, ptlp)    \
+({                                                     \
+       spinlock_t *__ptl = pte_lockptr(mm, pmd);       \
+       pte_t *__pte = pte_offset_map(pmd, address);    \
+       *(ptlp) = __ptl;                                \
+       spin_lock(__ptl);                               \
+       __pte;                                          \
+})
+
+#define pte_unmap_unlock(pte, ptl)     do {            \
+       spin_unlock(ptl);                               \
+       pte_unmap(pte);                                 \
+} while (0)
+
+#define pte_alloc_map(mm, pmd, address)                        \
+       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+               NULL: pte_offset_map(pmd, address))
+
+#define pte_alloc_map_lock(mm, pmd, address, ptlp)     \
+       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+               NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+
+#define pte_alloc_kernel(pmd, address)                 \
+       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+               NULL: pte_offset_kernel(pmd, address))
  
  extern void free_area_init(unsigned long * zones_size);
  extern void free_area_init_node(int nid, pg_data_t *pgdat,
         unsigned long * zones_size, unsigned long zone_start_pfn, 
         unsigned long *zholes_size);
  extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
+extern void setup_per_zone_pages_min(void);
  extern void mem_init(void);
  extern void show_mem(void);
  extern void si_meminfo(struct sysinfo * val);
@@ -837,6 +867,7 @@ extern int split_vma(struct mm_struct *,
  extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
  extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
         struct rb_node **, struct rb_node *);
+extern void unlink_file_vma(struct vm_area_struct *);
  extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
         unsigned long addr, unsigned long len, pgoff_t pgoff);
  extern void exit_mmap(struct mm_struct *);
@@ -884,20 +915,23 @@ int write_one_page(struct page *page, int wait);
                                          * turning readahead off */
  
  int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-                       unsigned long offset, unsigned long nr_to_read);
+                       pgoff_t offset, unsigned long nr_to_read);
  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
-                       unsigned long offset, unsigned long nr_to_read);
-unsigned long  page_cache_readahead(struct address_space *mapping,
+                       pgoff_t offset, unsigned long nr_to_read);
+unsigned long page_cache_readahead(struct address_space *mapping,
                           struct file_ra_state *ra,
                           struct file *filp,
-                         unsigned long offset,
+                         pgoff_t offset,
                           unsigned long size);
  void handle_ra_miss(struct address_space *mapping, 
                     struct file_ra_state *ra, pgoff_t offset);
  unsigned long max_sane_readahead(unsigned long nr);
  
  /* Do stack extension */
-extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
+extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+#ifdef CONFIG_IA64
+extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+#endif
  
  /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
  extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -920,15 +954,19 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
         return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
  }
  
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct page *vmalloc_to_page(void *addr);
+unsigned long vmalloc_to_pfn(void *addr);
+int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+                       unsigned long pfn, unsigned long size, pgprot_t);
+int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
  
-extern struct page * vmalloc_to_page(void *addr);
-extern unsigned long vmalloc_to_pfn(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
-               int write);
-extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
-int remap_pfn_range(struct vm_area_struct *, unsigned long,
-               unsigned long, unsigned long, pgprot_t);
+struct page *follow_page(struct vm_area_struct *, unsigned long address,
+                       unsigned int foll_flags);
+#define FOLL_WRITE     0x01    /* check pte is writable */
+#define FOLL_TOUCH     0x02    /* mark page accessed */
+#define FOLL_GET       0x04    /* do get_page on page */
+#define FOLL_ANON      0x08    /* give ZERO_PAGE if no pgtable */
  
  #ifdef CONFIG_XEN
  typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr, 
@@ -938,29 +976,14 @@ extern int generic_page_range(struct mm_struct *mm, unsigned long address,
  #endif
  
  #ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
  #else
-static inline void __vm_stat_account(struct mm_struct *mm,
+static inline void vm_stat_account(struct mm_struct *mm,
                         unsigned long flags, struct file *file, long pages)
  {
  }
  #endif /* CONFIG_PROC_FS */
  
-static inline void vm_stat_account(struct vm_area_struct *vma)
-{
-       __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
-                                                       vma_pages(vma));
-}
-
-static inline void vm_stat_unaccount(struct vm_area_struct *vma)
-{
-       __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
-                                                       -vma_pages(vma));
-}
-
-/* update per process rss and vm hiwater data */
-extern void update_mem_hiwater(struct task_struct *tsk);
-
  #ifndef CONFIG_DEBUG_PAGEALLOC
  static inline void
  kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/linux-2.6-xen-sparse/include/linux/skbuff.h b/linux-2.6-xen-sparse/include/linux/skbuff.h

index 13bb762638c9f805f8a84ff764a27f079b0f596a..8a235b12807b382f18b1c11f2ed99591e9d0ade5 100644 (file)
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h
@@ -137,6 +137,8 @@ struct skb_shared_info {
         unsigned int    nr_frags;
         unsigned short  tso_size;
         unsigned short  tso_segs;
+       unsigned short  ufo_size;
+       unsigned int    ip6_frag_id;
         struct sk_buff  *frag_list;
         skb_frag_t      frags[MAX_SKB_FRAGS];
  };
@@ -171,7 +173,6 @@ enum {
   *     struct sk_buff - socket buffer
   *     @next: Next buffer in list
   *     @prev: Previous buffer in list
- *     @list: List we are on
   *     @sk: Socket we are owned by
   *     @tstamp: Time we arrived
   *     @dev: Device we arrived on/are leaving by
@@ -192,6 +193,7 @@ enum {
   *     @proto_csum_valid: Protocol csum validated since arriving at localhost
   *     @proto_csum_blank: Protocol csum must be added before leaving localhost
   *     @pkt_type: Packet class
+ *     @fclone: skbuff clone status
   *     @ip_summed: Driver fed us an IP checksum
   *     @priority: Packet queueing priority
   *     @users: User count - see {datagram,tcp}.c
@@ -204,7 +206,9 @@ enum {
   *     @destructor: Destruct function
   *     @nfmark: Can be used for communication between hooks
   *     @nfct: Associated connection, if any
+ *     @ipvs_property: skbuff is owned by ipvs
   *     @nfctinfo: Relationship of this skb to the connection
+ *     @nfct_reasm: netfilter conntrack re-assembly pointer
   *     @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
   *     @tc_index: Traffic control index
   *     @tc_verd: traffic control verdict
@@ -263,13 +267,13 @@ struct sk_buff {
                                 nohdr:1,
                                 nfctinfo:3;
         __u8                    pkt_type:3,
+                               fclone:2,
  #ifndef CONFIG_XEN
-                               fclone:2;
+                               ipvs_property:1;
  #else
-                               fclone:2,
+                               ipvs_property:1,
                                 proto_csum_valid:1,
                                 proto_csum_blank:1;
-                               /* 1 bit spare */
  #endif
         __be16                  protocol;
  
@@ -277,8 +281,8 @@ struct sk_buff {
  #ifdef CONFIG_NETFILTER
         __u32                   nfmark;
         struct nf_conntrack     *nfct;
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-       __u8                    ipvs_property:1;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       struct sk_buff          *nfct_reasm;
  #endif
  #ifdef CONFIG_BRIDGE_NETFILTER
         struct nf_bridge_info   *nf_bridge;
@@ -350,6 +354,11 @@ extern void              skb_over_panic(struct sk_buff *skb, int len,
  extern void          skb_under_panic(struct sk_buff *skb, int len,
                                       void *here);
  
+extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+                       int getfrag(void *from, char *to, int offset,
+                       int len,int odd, struct sk_buff *skb),
+                       void *from, int length);
+
  struct skb_seq_state
  {
         __u32           lower_offset;
@@ -605,29 +614,46 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
   */
  
  /**
- *     __skb_queue_head - queue a buffer at the list head
+ *     __skb_queue_after - queue a buffer at the list head
   *     @list: list to use
+ *     @prev: place after this buffer
   *     @newsk: buffer to queue
   *
- *     Queue a buffer at the start of a list. This function takes no locks
+ *     Queue a buffer int the middle of a list. This function takes no locks
   *     and you must therefore hold required locks before calling it.
   *
   *     A buffer cannot be placed on two lists at the same time.
   */
-extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
-static inline void __skb_queue_head(struct sk_buff_head *list,
-                                   struct sk_buff *newsk)
+static inline void __skb_queue_after(struct sk_buff_head *list,
+                                    struct sk_buff *prev,
+                                    struct sk_buff *newsk)
  {
-       struct sk_buff *prev, *next;
-
+       struct sk_buff *next;
         list->qlen++;
-       prev = (struct sk_buff *)list;
+
         next = prev->next;
         newsk->next = next;
         newsk->prev = prev;
         next->prev  = prev->next = newsk;
  }
  
+/**
+ *     __skb_queue_head - queue a buffer at the list head
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the start of a list. This function takes no locks
+ *     and you must therefore hold required locks before calling it.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_head(struct sk_buff_head *list,
+                                   struct sk_buff *newsk)
+{
+       __skb_queue_after(list, (struct sk_buff *)list, newsk);
+}
+
  /**
   *     __skb_queue_tail - queue a buffer at the list tail
   *     @list: list to use
@@ -1205,6 +1231,11 @@ static inline void kunmap_skb_frag(void *vaddr)
                      prefetch(skb->next), (skb != (struct sk_buff *)(queue));   \
                      skb = skb->next)
  
+#define skb_queue_reverse_walk(queue, skb) \
+               for (skb = (queue)->prev;                                       \
+                    prefetch(skb->prev), (skb != (struct sk_buff *)(queue));   \
+                    skb = skb->prev)
+
  
  extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
                                          int noblock, int *err);
@@ -1213,8 +1244,7 @@ extern unsigned int    datagram_poll(struct file *file, struct socket *sock,
  extern int            skb_copy_datagram_iovec(const struct sk_buff *from,
                                                int offset, struct iovec *to,
                                                int size);
-extern int            skb_copy_and_csum_datagram_iovec(const
-                                                       struct sk_buff *skb,
+extern int            skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
                                                         int hlen,
                                                         struct iovec *iov);
  extern void           skb_free_datagram(struct sock *sk, struct sk_buff *skb);
@@ -1282,6 +1312,30 @@ static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *
  
  extern void __net_timestamp(struct sk_buff *skb);
  
+extern unsigned int __skb_checksum_complete(struct sk_buff *skb);
+
+/**
+ *     skb_checksum_complete - Calculate checksum of an entire packet
+ *     @skb: packet to process
+ *
+ *     This function calculates the checksum over the entire packet plus
+ *     the value of skb->csum.  The latter can be used to supply the
+ *     checksum of a pseudo header as used by TCP/UDP.  It returns the
+ *     checksum.
+ *
+ *     For protocols that contain complete checksums such as ICMP/TCP/UDP,
+ *     this function can be used to verify that checksum on received
+ *     packets.  In that case the function should return zero if the
+ *     checksum is correct.  In particular, this function will return zero
+ *     if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
+ *     hardware has already verified the correctness of the checksum.
+ */
+static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
+{
+       return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+               __skb_checksum_complete(skb);
+}
+
  #ifdef CONFIG_NETFILTER
  static inline void nf_conntrack_put(struct nf_conntrack *nfct)
  {
@@ -1293,10 +1347,26 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
         if (nfct)
                 atomic_inc(&nfct->use);
  }
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
+{
+       if (skb)
+               atomic_inc(&skb->users);
+}
+static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
+{
+       if (skb)
+               kfree_skb(skb);
+}
+#endif
  static inline void nf_reset(struct sk_buff *skb)
  {
         nf_conntrack_put(skb->nfct);
         skb->nfct = NULL;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+       skb->nfct_reasm = NULL;
+#endif
  }
  
  #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/linux-2.6-xen-sparse/kernel/irq/manage.c b/linux-2.6-xen-sparse/kernel/irq/manage.c

index bac0c49512b3b4f4b65c8ef9b6cb624f3cabe2d4..84c96f0c1c97fd625929f9cd0d593f87c8c284be 100644 (file)
--- a/linux-2.6-xen-sparse/kernel/irq/manage.c
+++ b/linux-2.6-xen-sparse/kernel/irq/manage.c
@@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
  
  /**
   *     synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ *     @irq: interrupt number to wait for
   *
   *     This function waits for any pending IRQ handlers for this interrupt
   *     to complete before returning. If you use this function while
@@ -35,6 +36,9 @@ void synchronize_irq(unsigned int irq)
  {
         struct irq_desc *desc = irq_desc + irq;
  
+       if (irq >= NR_IRQS)
+               return;
+
         while (desc->status & IRQ_INPROGRESS)
                 cpu_relax();
  }
@@ -59,6 +63,9 @@ void disable_irq_nosync(unsigned int irq)
         irq_desc_t *desc = irq_desc + irq;
         unsigned long flags;
  
+       if (irq >= NR_IRQS)
+               return;
+
         spin_lock_irqsave(&desc->lock, flags);
         if (!desc->depth++) {
                 desc->status |= IRQ_DISABLED;
@@ -85,6 +92,9 @@ void disable_irq(unsigned int irq)
  {
         irq_desc_t *desc = irq_desc + irq;
  
+       if (irq >= NR_IRQS)
+               return;
+
         disable_irq_nosync(irq);
         if (desc->action)
                 synchronize_irq(irq);
@@ -107,6 +117,9 @@ void enable_irq(unsigned int irq)
         irq_desc_t *desc = irq_desc + irq;
         unsigned long flags;
  
+       if (irq >= NR_IRQS)
+               return;
+
         spin_lock_irqsave(&desc->lock, flags);
         switch (desc->depth) {
         case 0:
@@ -167,6 +180,9 @@ int setup_irq(unsigned int irq, struct irqaction * new)
         unsigned long flags;
         int shared = 0;
  
+       if (irq >= NR_IRQS)
+               return -EINVAL;
+
         if (desc->handler == &no_irq_type)
                 return -ENOSYS;
         /*
diff --git a/linux-2.6-xen-sparse/lib/Kconfig.debug b/linux-2.6-xen-sparse/lib/Kconfig.debug

index 499ad62b79961c43d23071706d96394998c5b1d4..21ce018d0748cdab35096975ae2f281b0eb936a6 100644 (file)
--- a/linux-2.6-xen-sparse/lib/Kconfig.debug
+++ b/linux-2.6-xen-sparse/lib/Kconfig.debug
@@ -128,7 +128,7 @@ config DEBUG_HIGHMEM
  config DEBUG_BUGVERBOSE
         bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
         depends on BUG
-       depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || (X86 && !X86_64) || FRV
+       depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
         default !EMBEDDED
         help
           Say Y here to make BUG() panics output the file name and line number
@@ -168,13 +168,34 @@ config DEBUG_FS
  
           If unsure, say N.
  
+config DEBUG_VM
+       bool "Debug VM"
+       depends on DEBUG_KERNEL
+       help
+         Enable this to debug the virtual-memory system.
+
+         If unsure, say N.
+
  config FRAME_POINTER
         bool "Compile the kernel with frame pointers"
         depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML)
         default y if DEBUG_INFO && UML
         help
           If you say Y here the resulting kernel image will be slightly larger
-         and slower, but it might give very useful debugging information
-         on some architectures or you use external debuggers.
+         and slower, but it might give very useful debugging information on
+         some architectures or if you use external debuggers.
           If you don't debug the kernel, you can say N.
  
+config RCU_TORTURE_TEST
+       tristate "torture tests for RCU"
+       depends on DEBUG_KERNEL
+       default n
+       help
+         This option provides a kernel module that runs torture tests
+         on the RCU infrastructure.  The kernel module may be built
+         after the fact on the running kernel to be tested, if desired.
+
+         Say Y here if you want RCU torture tests to start automatically
+         at boot time (you probably don't).
+         Say M if you want the RCU torture tests to build as a module.
+         Say N if you are unsure.
diff --git a/linux-2.6-xen-sparse/lib/Makefile b/linux-2.6-xen-sparse/lib/Makefile

new file mode 100644 (file)

index 0000000..fa1afe8
--- /dev/null
+++ b/linux-2.6-xen-sparse/lib/Makefile
@@ -0,0 +1,60 @@
+#
+# Makefile for some libs needed in the kernel.
+#
+
+lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
+        bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
+        idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+        sha1.o
+
+lib-y  += kobject.o kref.o kobject_uevent.o klist.o
+
+obj-y += sort.o parser.o halfmd4.o
+
+ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+CFLAGS_kobject.o += -DDEBUG
+CFLAGS_kobject_uevent.o += -DDEBUG
+endif
+
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
+lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
+lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+
+ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
+  lib-y += dec_and_lock.o
+endif
+
+obj-$(CONFIG_CRC_CCITT)        += crc-ccitt.o
+obj-$(CONFIG_CRC16)    += crc16.o
+obj-$(CONFIG_CRC32)    += crc32.o
+obj-$(CONFIG_LIBCRC32C)        += libcrc32c.o
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
+
+obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
+obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
+obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+
+obj-$(CONFIG_TEXTSEARCH) += textsearch.o
+obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
+obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
+
+ifneq ($(CONFIG_XEN),y)
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
+endif
+
+hostprogs-y    := gen_crc32table
+clean-files    := crc32table.h
+
+$(obj)/crc32.o: $(obj)/crc32table.h
+
+quiet_cmd_crc32 = GEN     $@
+      cmd_crc32 = $< > $@
+
+$(obj)/crc32table.h: $(obj)/gen_crc32table
+       $(call cmd,crc32)
diff --git a/linux-2.6-xen-sparse/mm/Kconfig b/linux-2.6-xen-sparse/mm/Kconfig

new file mode 100644 (file)

index 0000000..4338bbe
--- /dev/null
+++ b/linux-2.6-xen-sparse/mm/Kconfig
@@ -0,0 +1,137 @@
+config SELECT_MEMORY_MODEL
+       def_bool y
+       depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
+
+choice
+       prompt "Memory model"
+       depends on SELECT_MEMORY_MODEL
+       default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
+       default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
+       default FLATMEM_MANUAL
+
+config FLATMEM_MANUAL
+       bool "Flat Memory"
+       depends on !ARCH_DISCONTIGMEM_ENABLE || ARCH_FLATMEM_ENABLE
+       help
+         This option allows you to change some of the ways that
+         Linux manages its memory internally.  Most users will
+         only have one option here: FLATMEM.  This is normal
+         and a correct option.
+
+         Some users of more advanced features like NUMA and
+         memory hotplug may have different options here.
+         DISCONTIGMEM is an more mature, better tested system,
+         but is incompatible with memory hotplug and may suffer
+         decreased performance over SPARSEMEM.  If unsure between
+         "Sparse Memory" and "Discontiguous Memory", choose
+         "Discontiguous Memory".
+
+         If unsure, choose this option (Flat Memory) over any other.
+
+config DISCONTIGMEM_MANUAL
+       bool "Discontiguous Memory"
+       depends on ARCH_DISCONTIGMEM_ENABLE
+       help
+         This option provides enhanced support for discontiguous
+         memory systems, over FLATMEM.  These systems have holes
+         in their physical address spaces, and this option provides
+         more efficient handling of these holes.  However, the vast
+         majority of hardware has quite flat address spaces, and
+         can have degraded performance from extra overhead that
+         this option imposes.
+
+         Many NUMA configurations will have this as the only option.
+
+         If unsure, choose "Flat Memory" over this option.
+
+config SPARSEMEM_MANUAL
+       bool "Sparse Memory"
+       depends on ARCH_SPARSEMEM_ENABLE
+       help
+         This will be the only option for some systems, including
+         memory hotplug systems.  This is normal.
+
+         For many other systems, this will be an alternative to
+         "Discontiguous Memory".  This option provides some potential
+         performance benefits, along with decreased code complexity,
+         but it is newer, and more experimental.
+
+         If unsure, choose "Discontiguous Memory" or "Flat Memory"
+         over this option.
+
+endchoice
+
+config DISCONTIGMEM
+       def_bool y
+       depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL
+
+config SPARSEMEM
+       def_bool y
+       depends on SPARSEMEM_MANUAL
+
+config FLATMEM
+       def_bool y
+       depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
+
+config FLAT_NODE_MEM_MAP
+       def_bool y
+       depends on !SPARSEMEM
+
+#
+# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
+# to represent different areas of memory.  This variable allows
+# those dependencies to exist individually.
+#
+config NEED_MULTIPLE_NODES
+       def_bool y
+       depends on DISCONTIGMEM || NUMA
+
+config HAVE_MEMORY_PRESENT
+       def_bool y
+       depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
+
+#
+# SPARSEMEM_EXTREME (which is the default) does some bootmem
+# allocations when memory_present() is called.  If this can not
+# be done on your architecture, select this option.  However,
+# statically allocating the mem_section[] array can potentially
+# consume vast quantities of .bss, so be careful.
+#
+# This option will also potentially produce smaller runtime code
+# with gcc 3.4 and later.
+#
+config SPARSEMEM_STATIC
+       def_bool n
+
+#
+# Architectecture platforms which require a two level mem_section in SPARSEMEM
+# must select this option. This is usually for architecture platforms with
+# an extremely sparse physical address space.
+#
+config SPARSEMEM_EXTREME
+       def_bool y
+       depends on SPARSEMEM && !SPARSEMEM_STATIC
+
+# eventually, we can have this option just 'select SPARSEMEM'
+config MEMORY_HOTPLUG
+       bool "Allow for memory hot-add"
+       depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+
+comment "Memory hotplug is currently incompatible with Software Suspend"
+       depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
+
+# Heavily threaded applications may benefit from splitting the mm-wide
+# page_table_lock, so that faults on different parts of the user address
+# space can be handled with less contention: split it at this NR_CPUS.
+# Default to 4 for wider testing, though 8 might be more appropriate.
+# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
+# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+# XEN uses the mapping field on pagetable pages to store a pointer to
+# the destructor.
+#
+config SPLIT_PTLOCK_CPUS
+       int
+       default "4096" if ARM && !CPU_CACHE_VIPT
+       default "4096" if PARISC && !PA20
+       default "4096" if XEN
+       default "4"
diff --git a/linux-2.6-xen-sparse/mm/highmem.c b/linux-2.6-xen-sparse/mm/highmem.c

index b22595fb845bb050e93ab29949e7db2c3106f062..b29bf621ecc4b220cbfea42ed95beb67d6aff47d 100644 (file)
--- a/linux-2.6-xen-sparse/mm/highmem.c
+++ b/linux-2.6-xen-sparse/mm/highmem.c
@@ -30,11 +30,9 @@
  
  static mempool_t *page_pool, *isa_page_pool;
  
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
+static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
  {
-       unsigned int gfp = gfp_mask | (unsigned int) (long) data;
-
-       return alloc_page(gfp);
+       return alloc_page(gfp_mask | GFP_DMA);
  }
  
  static void page_pool_free(void *page, void *data)
@@ -51,6 +49,12 @@ static void page_pool_free(void *page, void *data)
   *  n means that there are (n-1) current users of it.
   */
  #ifdef CONFIG_HIGHMEM
+
+static void *page_pool_alloc(gfp_t gfp_mask, void *data)
+{
+       return alloc_page(gfp_mask);
+}
+
  static int pkmap_count[LAST_PKMAP];
  static unsigned int last_pkmap_nr;
  static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
@@ -278,7 +282,7 @@ int init_emergency_isa_pool(void)
         if (isa_page_pool)
                 return 0;
  
-       isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA);
+       isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
         if (!isa_page_pool)
                 BUG();
  
diff --git a/linux-2.6-xen-sparse/mm/memory.c b/linux-2.6-xen-sparse/mm/memory.c

index 4e359728a2aaac85b0e5d4d92b610702c8f34d41..a00a1057bfe42be9f53631ca8ebc05fe254daf60 100644 (file)
--- a/linux-2.6-xen-sparse/mm/memory.c
+++ b/linux-2.6-xen-sparse/mm/memory.c
@@ -114,6 +114,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
  {
         struct page *page = pmd_page(*pmd);
         pmd_clear(pmd);
+       pte_lock_deinit(page);
         pte_free_tlb(tlb, page);
         dec_page_state(nr_page_table_pages);
         tlb->mm->nr_ptes--;
@@ -249,7 +250,7 @@ void free_pgd_range(struct mmu_gather **tlb,
                 free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
         } while (pgd++, addr = next, addr != end);
  
-       if (!tlb_is_full_mm(*tlb))
+       if (!(*tlb)->fullmm)
                 flush_tlb_pgtables((*tlb)->mm, start, end);
  }
  
@@ -260,6 +261,12 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
                 struct vm_area_struct *next = vma->vm_next;
                 unsigned long addr = vma->vm_start;
  
+               /*
+                * Hide vma from rmap and vmtruncate before freeing pgtables
+                */
+               anon_vma_unlink(vma);
+               unlink_file_vma(vma);
+
                 if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
                         hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
                                 floor, next? next->vm_start: ceiling);
@@ -272,6 +279,8 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
                                                         HPAGE_SIZE)) {
                                 vma = next;
                                 next = vma->vm_next;
+                               anon_vma_unlink(vma);
+                               unlink_file_vma(vma);
                         }
                         free_pgd_range(tlb, addr, vma->vm_end,
                                 floor, next? next->vm_start: ceiling);
@@ -280,75 +289,141 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
         }
  }
  
-pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
-                               unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
  {
-       if (!pmd_present(*pmd)) {
-               struct page *new;
+       struct page *new = pte_alloc_one(mm, address);
+       if (!new)
+               return -ENOMEM;
  
-               spin_unlock(&mm->page_table_lock);
-               new = pte_alloc_one(mm, address);
-               spin_lock(&mm->page_table_lock);
-               if (!new)
-                       return NULL;
-               /*
-                * Because we dropped the lock, we should re-check the
-                * entry, as somebody else could have populated it..
-                */
-               if (pmd_present(*pmd)) {
-                       pte_free(new);
-                       goto out;
-               }
+       pte_lock_init(new);
+       spin_lock(&mm->page_table_lock);
+       if (pmd_present(*pmd)) {        /* Another has populated it */
+               pte_lock_deinit(new);
+               pte_free(new);
+       } else {
                 mm->nr_ptes++;
                 inc_page_state(nr_page_table_pages);
                 pmd_populate(mm, pmd, new);
         }
-out:
-       return pte_offset_map(pmd, address);
+       spin_unlock(&mm->page_table_lock);
+       return 0;
+}
+
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+{
+       pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+       if (!new)
+               return -ENOMEM;
+
+       spin_lock(&init_mm.page_table_lock);
+       if (pmd_present(*pmd))          /* Another has populated it */
+               pte_free_kernel(new);
+       else
+               pmd_populate_kernel(&init_mm, pmd, new);
+       spin_unlock(&init_mm.page_table_lock);
+       return 0;
+}
+
+static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+{
+       if (file_rss)
+               add_mm_counter(mm, file_rss, file_rss);
+       if (anon_rss)
+               add_mm_counter(mm, anon_rss, anon_rss);
  }
  
-pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+/*
+ * This function is called to print an error when a bad pte
+ * is found. For example, we might have a PFN-mapped pte in
+ * a region that doesn't allow it.
+ *
+ * The calling function must still handle the error.
+ */
+void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+{
+       printk(KERN_ERR "Bad pte = %08llx, process = %s, "
+                       "vm_flags = %lx, vaddr = %lx\n",
+               (long long)pte_val(pte),
+               (vma->vm_mm == current->mm ? current->comm : "???"),
+               vma->vm_flags, vaddr);
+       dump_stack();
+}
+
+static inline int is_cow_mapping(unsigned int flags)
+{
+       return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
+/*
+ * This function gets the "struct page" associated with a pte.
+ *
+ * NOTE! Some mappings do not have "struct pages". A raw PFN mapping
+ * will have each page table entry just pointing to a raw page frame
+ * number, and as far as the VM layer is concerned, those do not have
+ * pages associated with them - even if the PFN might point to memory
+ * that otherwise is perfectly fine and has a "struct page".
+ *
+ * The way we recognize those mappings is through the rules set up
+ * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set,
+ * and the vm_pgoff will point to the first PFN mapped: thus every
+ * page that is a raw mapping will always honor the rule
+ *
+ *     pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
+ *
+ * and if that isn't true, the page has been COW'ed (in which case it
+ * _does_ have a "struct page" associated with it even if it is in a
+ * VM_PFNMAP range).
+ */
+struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
  {
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
+       unsigned long pfn = pte_pfn(pte);
  
-               spin_unlock(&mm->page_table_lock);
-               new = pte_alloc_one_kernel(mm, address);
-               spin_lock(&mm->page_table_lock);
-               if (!new)
+       if (vma->vm_flags & VM_PFNMAP) {
+               unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
+               if (pfn == vma->vm_pgoff + off)
+                       return NULL;
+               if (!is_cow_mapping(vma->vm_flags))
                         return NULL;
+       }
  
-               /*
-                * Because we dropped the lock, we should re-check the
-                * entry, as somebody else could have populated it..
-                */
-               if (pmd_present(*pmd)) {
-                       pte_free_kernel(new);
-                       goto out;
-               }
-               pmd_populate_kernel(mm, pmd, new);
+       /*
+        * Add some anal sanity checks for now. Eventually,
+        * we should just do "return pfn_to_page(pfn)", but
+        * in the meantime we check that we get a valid pfn,
+        * and that the resulting page looks ok.
+        *
+        * Remove this test eventually!
+        */
+       if (unlikely(!pfn_valid(pfn))) {
+               if (!vma->vm_flags & VM_RESERVED)
+                       print_bad_pte(vma, pte, addr);
+               return NULL;
         }
-out:
-       return pte_offset_kernel(pmd, address);
+
+       /*
+        * NOTE! We still have PageReserved() pages in the page 
+        * tables. 
+        *
+        * The PAGE_ZERO() pages and various VDSO mappings can
+        * cause them to exist.
+        */
+       return pfn_to_page(pfn);
  }
  
  /*
   * copy one vm_area from one task to the other. Assumes the page tables
   * already present in the new task to be cleared in the whole range
   * covered by this vma.
- *
- * dst->page_table_lock is held on entry and exit,
- * but may be dropped within p[mg]d_alloc() and pte_alloc_map().
   */
  
  static inline void
  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-               pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
-               unsigned long addr)
+               pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+               unsigned long addr, int *rss)
  {
+       unsigned long vm_flags = vma->vm_flags;
         pte_t pte = *src_pte;
         struct page *page;
-       unsigned long pfn;
  
         /* pte contains position in swap or file, so copy. */
         if (unlikely(!pte_present(pte))) {
@@ -357,34 +432,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                         /* make sure dst_mm is on swapoff's mmlist. */
                         if (unlikely(list_empty(&dst_mm->mmlist))) {
                                 spin_lock(&mmlist_lock);
-                               list_add(&dst_mm->mmlist, &src_mm->mmlist);
+                               if (list_empty(&dst_mm->mmlist))
+                                       list_add(&dst_mm->mmlist,
+                                                &src_mm->mmlist);
                                 spin_unlock(&mmlist_lock);
                         }
                 }
-               set_pte_at(dst_mm, addr, dst_pte, pte);
-               return;
-       }
-
-       pfn = pte_pfn(pte);
-       /* the pte points outside of valid memory, the
-        * mapping is assumed to be good, meaningful
-        * and not mapped via rmap - duplicate the
-        * mapping as is.
-        */
-       page = NULL;
-       if (pfn_valid(pfn))
-               page = pfn_to_page(pfn);
-
-       if (!page || PageReserved(page)) {
-               set_pte_at(dst_mm, addr, dst_pte, pte);
-               return;
+               goto out_set_pte;
         }
  
         /*
          * If it's a COW mapping, write protect it both
          * in the parent and the child
          */
-       if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
+       if (is_cow_mapping(vm_flags)) {
                 ptep_set_wrprotect(src_mm, addr, src_pte);
                 pte = *src_pte;
         }
@@ -396,12 +457,16 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         if (vm_flags & VM_SHARED)
                 pte = pte_mkclean(pte);
         pte = pte_mkold(pte);
-       get_page(page);
-       inc_mm_counter(dst_mm, rss);
-       if (PageAnon(page))
-               inc_mm_counter(dst_mm, anon_rss);
+
+       page = vm_normal_page(vma, addr, pte);
+       if (page) {
+               get_page(page);
+               page_dup_rmap(page);
+               rss[!!PageAnon(page)]++;
+       }
+
+out_set_pte:
         set_pte_at(dst_mm, addr, dst_pte, pte);
-       page_dup_rmap(page);
  }
  
  static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -409,38 +474,44 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 unsigned long addr, unsigned long end)
  {
         pte_t *src_pte, *dst_pte;
-       unsigned long vm_flags = vma->vm_flags;
-       int progress;
+       spinlock_t *src_ptl, *dst_ptl;
+       int progress = 0;
+       int rss[2];
  
  again:
-       dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+       rss[1] = rss[0] = 0;
+       dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
         if (!dst_pte)
                 return -ENOMEM;
         src_pte = pte_offset_map_nested(src_pmd, addr);
+       src_ptl = pte_lockptr(src_mm, src_pmd);
+       spin_lock(src_ptl);
  
-       progress = 0;
-       spin_lock(&src_mm->page_table_lock);
         do {
                 /*
                  * We are holding two locks at this point - either of them
                  * could generate latencies in another task on another CPU.
                  */
-               if (progress >= 32 && (need_resched() ||
-                   need_lockbreak(&src_mm->page_table_lock) ||
-                   need_lockbreak(&dst_mm->page_table_lock)))
-                       break;
+               if (progress >= 32) {
+                       progress = 0;
+                       if (need_resched() ||
+                           need_lockbreak(src_ptl) ||
+                           need_lockbreak(dst_ptl))
+                               break;
+               }
                 if (pte_none(*src_pte)) {
                         progress++;
                         continue;
                 }
-               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
                 progress += 8;
         } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
-       spin_unlock(&src_mm->page_table_lock);
  
+       spin_unlock(src_ptl);
         pte_unmap_nested(src_pte - 1);
-       pte_unmap(dst_pte - 1);
-       cond_resched_lock(&dst_mm->page_table_lock);
+       add_mm_rss(dst_mm, rss[0], rss[1]);
+       pte_unmap_unlock(dst_pte - 1, dst_ptl);
+       cond_resched();
         if (addr != end)
                 goto again;
         return 0;
@@ -504,7 +575,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
          * readonly mappings. The tradeoff is that copy_page_range is more
          * efficient than faulting.
          */
-       if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
+       if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
                 if (!vma->anon_vma)
                         return 0;
         }
@@ -525,25 +596,30 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         return 0;
  }
  
-static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pmd_t *pmd,
                                 unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
  {
+       struct mm_struct *mm = tlb->mm;
         pte_t *pte;
+       spinlock_t *ptl;
+       int file_rss = 0;
+       int anon_rss = 0;
  
-       pte = pte_offset_map(pmd, addr);
+       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
         do {
                 pte_t ptent = *pte;
-               if (pte_none(ptent))
+               if (pte_none(ptent)) {
+                       (*zap_work)--;
                         continue;
+               }
                 if (pte_present(ptent)) {
-                       struct page *page = NULL;
-                       unsigned long pfn = pte_pfn(ptent);
-                       if (pfn_valid(pfn)) {
-                               page = pfn_to_page(pfn);
-                               if (PageReserved(page))
-                                       page = NULL;
-                       }
+                       struct page *page;
+
+                       (*zap_work) -= PAGE_SIZE;
+
+                       page = vm_normal_page(vma, addr, ptent);
                         if (unlikely(details) && page) {
                                 /*
                                  * unmap_shared_mapping_pages() wants to
@@ -562,7 +638,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
                                      page->index > details->last_index))
                                         continue;
                         }
-                       ptent = ptep_get_and_clear_full(tlb->mm, addr, pte,
+                       ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                         tlb->fullmm);
                         tlb_remove_tlb_entry(tlb, pte, addr);
                         if (unlikely(!page))
@@ -570,15 +646,17 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
                         if (unlikely(details) && details->nonlinear_vma
                             && linear_page_index(details->nonlinear_vma,
                                                 addr) != page->index)
-                               set_pte_at(tlb->mm, addr, pte,
+                               set_pte_at(mm, addr, pte,
                                            pgoff_to_pte(page->index));
-                       if (pte_dirty(ptent))
-                               set_page_dirty(page);
                         if (PageAnon(page))
-                               dec_mm_counter(tlb->mm, anon_rss);
-                       else if (pte_young(ptent))
-                               mark_page_accessed(page);
-                       tlb->freed++;
+                               anon_rss--;
+                       else {
+                               if (pte_dirty(ptent))
+                                       set_page_dirty(page);
+                               if (pte_young(ptent))
+                                       mark_page_accessed(page);
+                               file_rss--;
+                       }
                         page_remove_rmap(page);
                         tlb_remove_page(tlb, page);
                         continue;
@@ -591,14 +669,19 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
                         continue;
                 if (!pte_file(ptent))
                         free_swap_and_cache(pte_to_swp_entry(ptent));
-               pte_clear_full(tlb->mm, addr, pte, tlb->fullmm);
-       } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(pte - 1);
+               pte_clear_full(mm, addr, pte, tlb->fullmm);
+       } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+
+       add_mm_rss(mm, file_rss, anon_rss);
+       pte_unmap_unlock(pte - 1, ptl);
+
+       return addr;
  }
  
-static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pud_t *pud,
                                 unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
  {
         pmd_t *pmd;
         unsigned long next;
@@ -606,15 +689,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
         pmd = pmd_offset(pud, addr);
         do {
                 next = pmd_addr_end(addr, end);
-               if (pmd_none_or_clear_bad(pmd))
+               if (pmd_none_or_clear_bad(pmd)) {
+                       (*zap_work)--;
                         continue;
-               zap_pte_range(tlb, pmd, addr, next, details);
-       } while (pmd++, addr = next, addr != end);
+               }
+               next = zap_pte_range(tlb, vma, pmd, addr, next,
+                                               zap_work, details);
+       } while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+       return addr;
  }
  
-static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pgd_t *pgd,
                                 unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
  {
         pud_t *pud;
         unsigned long next;
@@ -622,15 +711,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
         pud = pud_offset(pgd, addr);
         do {
                 next = pud_addr_end(addr, end);
-               if (pud_none_or_clear_bad(pud))
+               if (pud_none_or_clear_bad(pud)) {
+                       (*zap_work)--;
                         continue;
-               zap_pmd_range(tlb, pud, addr, next, details);
-       } while (pud++, addr = next, addr != end);
+               }
+               next = zap_pmd_range(tlb, vma, pud, addr, next,
+                                               zap_work, details);
+       } while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+       return addr;
  }
  
-static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma,
                                 unsigned long addr, unsigned long end,
-                               struct zap_details *details)
+                               long *zap_work, struct zap_details *details)
  {
         pgd_t *pgd;
         unsigned long next;
@@ -643,11 +738,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
         pgd = pgd_offset(vma->vm_mm, addr);
         do {
                 next = pgd_addr_end(addr, end);
-               if (pgd_none_or_clear_bad(pgd))
+               if (pgd_none_or_clear_bad(pgd)) {
+                       (*zap_work)--;
                         continue;
-               zap_pud_range(tlb, pgd, addr, next, details);
-       } while (pgd++, addr = next, addr != end);
+               }
+               next = zap_pud_range(tlb, vma, pgd, addr, next,
+                                               zap_work, details);
+       } while (pgd++, addr = next, (addr != end && *zap_work > 0));
         tlb_end_vma(tlb, vma);
+
+       return addr;
  }
  
  #ifdef CONFIG_PREEMPT
@@ -660,7 +760,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
  /**
   * unmap_vmas - unmap a range of memory covered by a list of vma's
   * @tlbp: address of the caller's struct mmu_gather
- * @mm: the controlling mm_struct
   * @vma: the starting vma
   * @start_addr: virtual address at which to start unmapping
   * @end_addr: virtual address at which to end unmapping
@@ -669,10 +768,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
   *
   * Returns the end address of the unmapping (restart addr if interrupted).
   *
- * Unmap all pages in the vma list.  Called under page_table_lock.
+ * Unmap all pages in the vma list.
   *
- * We aim to not hold page_table_lock for too long (for scheduling latency
- * reasons).  So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
+ * We aim to not hold locks for too long (for scheduling latency reasons).
+ * So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
   * return the ending mmu_gather to the caller.
   *
   * Only addresses between `start' and `end' will be unmapped.
@@ -684,17 +783,17 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
   * drops the lock and schedules.
   */
-unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp,
                 struct vm_area_struct *vma, unsigned long start_addr,
                 unsigned long end_addr, unsigned long *nr_accounted,
                 struct zap_details *details)
  {
-       unsigned long zap_bytes = ZAP_BLOCK_SIZE;
+       long zap_work = ZAP_BLOCK_SIZE;
         unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
         int tlb_start_valid = 0;
         unsigned long start = start_addr;
         spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
-       int fullmm = tlb_is_full_mm(*tlbp);
+       int fullmm = (*tlbp)->fullmm;
  
         for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
                 unsigned long end;
@@ -710,45 +809,39 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                         *nr_accounted += (end - start) >> PAGE_SHIFT;
  
                 while (start != end) {
-                       unsigned long block;
-
                         if (!tlb_start_valid) {
                                 tlb_start = start;
                                 tlb_start_valid = 1;
                         }
  
-                       if (is_vm_hugetlb_page(vma)) {
-                               block = end - start;
+                       if (unlikely(is_vm_hugetlb_page(vma))) {
                                 unmap_hugepage_range(vma, start, end);
-                       } else {
-                               block = min(zap_bytes, end - start);
-                               unmap_page_range(*tlbp, vma, start,
-                                               start + block, details);
+                               zap_work -= (end - start) /
+                                               (HPAGE_SIZE / PAGE_SIZE);
+                               start = end;
+                       } else
+                               start = unmap_page_range(*tlbp, vma,
+                                               start, end, &zap_work, details);
+
+                       if (zap_work > 0) {
+                               BUG_ON(start != end);
+                               break;
                         }
  
-                       start += block;
-                       zap_bytes -= block;
-                       if ((long)zap_bytes > 0)
-                               continue;
-
                         tlb_finish_mmu(*tlbp, tlb_start, start);
  
                         if (need_resched() ||
-                               need_lockbreak(&mm->page_table_lock) ||
                                 (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
                                 if (i_mmap_lock) {
-                                       /* must reset count of rss freed */
-                                       *tlbp = tlb_gather_mmu(mm, fullmm);
+                                       *tlbp = NULL;
                                         goto out;
                                 }
-                               spin_unlock(&mm->page_table_lock);
                                 cond_resched();
-                               spin_lock(&mm->page_table_lock);
                         }
  
-                       *tlbp = tlb_gather_mmu(mm, fullmm);
+                       *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
                         tlb_start_valid = 0;
-                       zap_bytes = ZAP_BLOCK_SIZE;
+                       zap_work = ZAP_BLOCK_SIZE;
                 }
         }
  out:
@@ -770,123 +863,92 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
         unsigned long end = address + size;
         unsigned long nr_accounted = 0;
  
-       if (is_vm_hugetlb_page(vma)) {
-               zap_hugepage_range(vma, address, size);
-               return end;
-       }
-
         lru_add_drain();
-       spin_lock(&mm->page_table_lock);
         tlb = tlb_gather_mmu(mm, 0);
-       end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
-       tlb_finish_mmu(tlb, address, end);
-       spin_unlock(&mm->page_table_lock);
+       update_hiwater_rss(mm);
+       end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+       if (tlb)
+               tlb_finish_mmu(tlb, address, end);
         return end;
  }
  
  /*
   * Do a quick page-table lookup for a single page.
- * mm->page_table_lock must be held.
   */
-static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
-                       int read, int write, int accessed)
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+                       unsigned int flags)
  {
         pgd_t *pgd;
         pud_t *pud;
         pmd_t *pmd;
         pte_t *ptep, pte;
-       unsigned long pfn;
+       spinlock_t *ptl;
         struct page *page;
+       struct mm_struct *mm = vma->vm_mm;
  
-       page = follow_huge_addr(mm, address, write);
-       if (! IS_ERR(page))
-               return page;
+       page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+       if (!IS_ERR(page)) {
+               BUG_ON(flags & FOLL_GET);
+               goto out;
+       }
  
+       page = NULL;
         pgd = pgd_offset(mm, address);
         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-               goto out;
+               goto no_page_table;
  
         pud = pud_offset(pgd, address);
         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-               goto out;
+               goto no_page_table;
         
         pmd = pmd_offset(pud, address);
         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+               goto no_page_table;
+
+       if (pmd_huge(*pmd)) {
+               BUG_ON(flags & FOLL_GET);
+               page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
                 goto out;
-       if (pmd_huge(*pmd))
-               return follow_huge_pmd(mm, address, pmd, write);
+       }
  
-       ptep = pte_offset_map(pmd, address);
+       ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
         if (!ptep)
                 goto out;
  
         pte = *ptep;
-       pte_unmap(ptep);
-       if (pte_present(pte)) {
-               if (write && !pte_write(pte))
-                       goto out;
-               if (read && !pte_read(pte))
-                       goto out;
-               pfn = pte_pfn(pte);
-               if (pfn_valid(pfn)) {
-                       page = pfn_to_page(pfn);
-                       if (accessed) {
-                               if (write && !pte_dirty(pte) &&!PageDirty(page))
-                                       set_page_dirty(page);
-                               mark_page_accessed(page);
-                       }
-                       return page;
-               }
+       if (!pte_present(pte))
+               goto unlock;
+       if ((flags & FOLL_WRITE) && !pte_write(pte))
+               goto unlock;
+       page = vm_normal_page(vma, address, pte);
+       if (unlikely(!page))
+               goto unlock;
+
+       if (flags & FOLL_GET)
+               get_page(page);
+       if (flags & FOLL_TOUCH) {
+               if ((flags & FOLL_WRITE) &&
+                   !pte_dirty(pte) && !PageDirty(page))
+                       set_page_dirty(page);
+               mark_page_accessed(page);
         }
-
+unlock:
+       pte_unmap_unlock(ptep, ptl);
  out:
-       return NULL;
-}
-
-inline struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write)
-{
-       return __follow_page(mm, address, 0, write, 1);
-}
-
-/*
- * check_user_page_readable() can be called frm niterrupt context by oprofile,
- * so we need to avoid taking any non-irq-safe locks
- */
-int check_user_page_readable(struct mm_struct *mm, unsigned long address)
-{
-       return __follow_page(mm, address, 1, 0, 0) != NULL;
-}
-EXPORT_SYMBOL(check_user_page_readable);
-
-static inline int
-untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
-                        unsigned long address)
-{
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       /* Check if the vma is for an anonymous mapping. */
-       if (vma->vm_ops && vma->vm_ops->nopage)
-               return 0;
-
-       /* Check if page directory entry exists. */
-       pgd = pgd_offset(mm, address);
-       if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-               return 1;
-
-       pud = pud_offset(pgd, address);
-       if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-               return 1;
-
-       /* Check if page middle directory entry exists. */
-       pmd = pmd_offset(pud, address);
-       if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-               return 1;
+       return page;
  
-       /* There is a pte slot for 'address' in 'mm'. */
-       return 0;
+no_page_table:
+       /*
+        * When core dumping an enormous anonymous area that nobody
+        * has touched so far, we don't want to allocate page tables.
+        */
+       if (flags & FOLL_ANON) {
+               page = ZERO_PAGE(address);
+               if (flags & FOLL_GET)
+                       get_page(page);
+               BUG_ON(flags & FOLL_WRITE);
+       }
+       return page;
  }
  
  int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
@@ -894,18 +956,19 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                 struct page **pages, struct vm_area_struct **vmas)
  {
         int i;
-       unsigned int flags;
+       unsigned int vm_flags;
  
         /* 
          * Require read or write permissions.
          * If 'force' is set, we only require the "MAY" flags.
          */
-       flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-       flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+       vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+       vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
         i = 0;
  
         do {
-               struct vm_area_struct * vma;
+               struct vm_area_struct *vma;
+               unsigned int foll_flags;
  
                 vma = find_extend_vma(mm, start);
                 if (!vma && in_gate_area(tsk, start)) {
@@ -933,8 +996,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 return i ? : -EFAULT;
                         }
                         if (pages) {
-                               pages[i] = pte_page(*pte);
-                               get_page(pages[i]);
+                               struct page *page = vm_normal_page(gate_vma, start, *pte);
+                               pages[i] = page;
+                               if (page)
+                                       get_page(page);
                         }
                         pte_unmap(pte);
                         if (vmas)
@@ -962,8 +1027,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         }
                  }
  #endif
-               if (!vma || (vma->vm_flags & VM_IO)
-                               || !(flags & vma->vm_flags))
+               if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
+                               || !(vm_flags & vma->vm_flags))
                         return i ? : -EFAULT;
  
                 if (is_vm_hugetlb_page(vma)) {
@@ -971,29 +1036,25 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                 &start, &len, i);
                         continue;
                 }
-               spin_lock(&mm->page_table_lock);
+
+               foll_flags = FOLL_TOUCH;
+               if (pages)
+                       foll_flags |= FOLL_GET;
+               if (!write && !(vma->vm_flags & VM_LOCKED) &&
+                   (!vma->vm_ops || !vma->vm_ops->nopage))
+                       foll_flags |= FOLL_ANON;
+
                 do {
-                       int write_access = write;
                         struct page *page;
  
-                       cond_resched_lock(&mm->page_table_lock);
-                       while (!(page = follow_page(mm, start, write_access))) {
-                               int ret;
-
-                               /*
-                                * Shortcut for anonymous pages. We don't want
-                                * to force the creation of pages tables for
-                                * insanely big anonymously mapped areas that
-                                * nobody touched so far. This is important
-                                * for doing a core dump for these mappings.
-                                */
-                               if (!write && untouched_anonymous_page(mm,vma,start)) {
-                                       page = ZERO_PAGE(start);
-                                       break;
-                               }
-                               spin_unlock(&mm->page_table_lock);
-                               ret = __handle_mm_fault(mm, vma, start, write_access);
+                       if (write)
+                               foll_flags |= FOLL_WRITE;
  
+                       cond_resched();
+                       while (!(page = follow_page(vma, start, foll_flags))) {
+                               int ret;
+                               ret = __handle_mm_fault(mm, vma, start,
+                                               foll_flags & FOLL_WRITE);
                                 /*
                                  * The VM_FAULT_WRITE bit tells us that do_wp_page has
                                  * broken COW when necessary, even if maybe_mkwrite
@@ -1001,7 +1062,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                  * subsequent page lookups as if they were reads.
                                  */
                                 if (ret & VM_FAULT_WRITE)
-                                       write_access = 0;
+                                       foll_flags &= ~FOLL_WRITE;
                                 
                                 switch (ret & ~VM_FAULT_WRITE) {
                                 case VM_FAULT_MINOR:
@@ -1017,13 +1078,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 default:
                                         BUG();
                                 }
-                               spin_lock(&mm->page_table_lock);
                         }
                         if (pages) {
                                 pages[i] = page;
                                 flush_dcache_page(page);
-                               if (!PageReserved(page))
-                                       page_cache_get(page);
                         }
                         if (vmas)
                                 vmas[i] = vma;
@@ -1031,7 +1089,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         start += PAGE_SIZE;
                         len--;
                 } while (len && start < vma->vm_end);
-               spin_unlock(&mm->page_table_lock);
         } while (len);
         return i;
  }
@@ -1041,16 +1098,21 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                         unsigned long addr, unsigned long end, pgprot_t prot)
  {
         pte_t *pte;
+       spinlock_t *ptl;
  
-       pte = pte_alloc_map(mm, pmd, addr);
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
         if (!pte)
                 return -ENOMEM;
         do {
-               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
+               struct page *page = ZERO_PAGE(addr);
+               pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+               page_cache_get(page);
+               page_add_file_rmap(page);
+               inc_mm_counter(mm, file_rss);
                 BUG_ON(!pte_none(*pte));
                 set_pte_at(mm, addr, pte, zero_pte);
         } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(pte - 1);
+       pte_unmap_unlock(pte - 1, ptl);
         return 0;
  }
  
@@ -1100,17 +1162,95 @@ int zeromap_page_range(struct vm_area_struct *vma,
         BUG_ON(addr >= end);
         pgd = pgd_offset(mm, addr);
         flush_cache_range(vma, addr, end);
-       spin_lock(&mm->page_table_lock);
         do {
                 next = pgd_addr_end(addr, end);
                 err = zeromap_pud_range(mm, pgd, addr, next, prot);
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
-       spin_unlock(&mm->page_table_lock);
         return err;
  }
  
+pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
+{
+       pgd_t * pgd = pgd_offset(mm, addr);
+       pud_t * pud = pud_alloc(mm, pgd, addr);
+       if (pud) {
+               pmd_t * pmd = pmd_alloc(mm, pud, addr);
+               if (pmd)
+                       return pte_alloc_map_lock(mm, pmd, addr, ptl);
+       }
+       return NULL;
+}
+
+/*
+ * This is the old fallback for page remapping.
+ *
+ * For historical reasons, it only allows reserved pages. Only
+ * old drivers should use this, and they needed to mark their
+ * pages reserved for the old functions anyway.
+ */
+static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
+{
+       int retval;
+       pte_t *pte;
+       spinlock_t *ptl;  
+
+       retval = -EINVAL;
+       if (PageAnon(page))
+               goto out;
+       retval = -ENOMEM;
+       flush_dcache_page(page);
+       pte = get_locked_pte(mm, addr, &ptl);
+       if (!pte)
+               goto out;
+       retval = -EBUSY;
+       if (!pte_none(*pte))
+               goto out_unlock;
+
+       /* Ok, finally just insert the thing.. */
+       get_page(page);
+       inc_mm_counter(mm, file_rss);
+       page_add_file_rmap(page);
+       set_pte_at(mm, addr, pte, mk_pte(page, prot));
+
+       retval = 0;
+out_unlock:
+       pte_unmap_unlock(pte, ptl);
+out:
+       return retval;
+}
+
+/*
+ * This allows drivers to insert individual pages they've allocated
+ * into a user vma.
+ *
+ * The page has to be a nice clean _individual_ kernel allocation.
+ * If you allocate a compound page, you need to have marked it as
+ * such (__GFP_COMP), or manually just split the page up yourself
+ * (which is mainly an issue of doing "set_page_count(page, 1)" for
+ * each sub-page, and then freeing them one by one when you free
+ * them rather than freeing it as a compound page).
+ *
+ * NOTE! Traditionally this was done with "remap_pfn_range()" which
+ * took an arbitrary page protection parameter. This doesn't allow
+ * that. Your vma protection will have to be set up correctly, which
+ * means that if you want a shared writable mapping, you'd better
+ * ask for a shared writable mapping!
+ *
+ * The page does not need to be reserved.
+ */
+int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page)
+{
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return -EFAULT;
+       if (!page_count(page))
+               return -EINVAL;
+       vma->vm_flags |= VM_INSERTPAGE;
+       return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_page);
+
  /*
   * maps a range of physical memory into the requested pages. the old
   * mappings are removed. any references to nonexistent pages results
@@ -1121,17 +1261,17 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                         unsigned long pfn, pgprot_t prot)
  {
         pte_t *pte;
+       spinlock_t *ptl;
  
-       pte = pte_alloc_map(mm, pmd, addr);
+       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
         if (!pte)
                 return -ENOMEM;
         do {
                 BUG_ON(!pte_none(*pte));
-               if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-                       set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
+               set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                 pfn++;
         } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap(pte - 1);
+       pte_unmap_unlock(pte - 1, ptl);
         return 0;
  }
  
@@ -1190,16 +1330,31 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
          * rest of the world about it:
          *   VM_IO tells people not to look at these pages
          *      (accesses can have side effects).
-        *   VM_RESERVED tells swapout not to try to touch
-        *      this region.
+        *   VM_RESERVED is specified all over the place, because
+        *      in 2.4 it kept swapout's vma scan off this vma; but
+        *      in 2.6 the LRU scan won't even find its pages, so this
+        *      flag means no more than count its pages in reserved_vm,
+        *      and omit it from core dump, even when VM_IO turned off.
+        *   VM_PFNMAP tells the core MM that the base pages are just
+        *      raw PFN mappings, and do not have a "struct page" associated
+        *      with them.
+        *
+        * There's a horrible special case to handle copy-on-write
+        * behaviour that some programs depend on. We mark the "original"
+        * un-COW'ed pages by matching them up with "vma->vm_pgoff".
          */
-       vma->vm_flags |= VM_IO | VM_RESERVED;
+       if (is_cow_mapping(vma->vm_flags)) {
+               if (addr != vma->vm_start || end != vma->vm_end)
+                       return -EINVAL;
+               vma->vm_pgoff = pfn;
+       }
+
+       vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
  
         BUG_ON(addr >= end);
         pfn -= addr >> PAGE_SHIFT;
         pgd = pgd_offset(mm, addr);
         flush_cache_range(vma, addr, end);
-       spin_lock(&mm->page_table_lock);
         do {
                 next = pgd_addr_end(addr, end);
                 err = remap_pud_range(mm, pgd, addr, next,
@@ -1207,7 +1362,6 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
-       spin_unlock(&mm->page_table_lock);
         return err;
  }
  EXPORT_SYMBOL(remap_pfn_range);
@@ -1224,7 +1378,7 @@ static inline int generic_pte_range(struct mm_struct *mm,
          struct page *pte_page;
  
          pte = (mm == &init_mm) ?
-                pte_alloc_kernel(mm, pmd, addr) :
+                pte_alloc_kernel(pmd, addr) :
                  pte_alloc_map(mm, pmd, addr);
          if (!pte)
                  return -ENOMEM;
@@ -1300,18 +1454,41 @@ int generic_page_range(struct mm_struct *mm, unsigned long addr,
  
         BUG_ON(addr >= end);
         pgd = pgd_offset(mm, addr);
-       spin_lock(&mm->page_table_lock);
         do {
                 next = pgd_addr_end(addr, end);
                 err = generic_pud_range(mm, pgd, addr, next, fn, data);
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
-       spin_unlock(&mm->page_table_lock);
         return err;
  }
  #endif
  
+/*
+ * handle_pte_fault chooses page fault handler according to an entry
+ * which was read non-atomically.  Before making any commitment, on
+ * those architectures or configurations (e.g. i386 with PAE) which
+ * might give a mix of unmatched parts, do_swap_page and do_file_page
+ * must check under lock before unmapping the pte and proceeding
+ * (but do_wp_page is only called after already making such a check;
+ * and do_anonymous_page and do_no_page can safely check later on).
+ */
+static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
+                               pte_t *page_table, pte_t orig_pte)
+{
+       int same = 1;
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+       if (sizeof(pte_t) > sizeof(unsigned long)) {
+               spinlock_t *ptl = pte_lockptr(mm, pmd);
+               spin_lock(ptl);
+               same = pte_same(*page_table, orig_pte);
+               spin_unlock(ptl);
+       }
+#endif
+       pte_unmap(page_table);
+       return same;
+}
+
  /*
   * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
   * servicing faults for write access.  In the normal case, do always want
@@ -1325,19 +1502,31 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
         return pte;
  }
  
-/*
- * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
- */
-static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
-               pte_t *page_table)
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
  {
-       pte_t entry;
+       /*
+        * If the source page was a PFN mapping, we don't have
+        * a "struct page" for it. We do a best-effort copy by
+        * just copying from the original user address. If that
+        * fails, we just zero-fill it. Live with it.
+        */
+       if (unlikely(!src)) {
+               void *kaddr = kmap_atomic(dst, KM_USER0);
+               void __user *uaddr = (void __user *)(va & PAGE_MASK);
  
-       entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
-                             vma);
-       ptep_establish(vma, address, page_table, entry);
-       update_mmu_cache(vma, address, entry);
-       lazy_mmu_prot_update(entry);
+               /*
+                * This really shouldn't fail, because the page is there
+                * in the page tables. But it might just be unreadable,
+                * in which case we just give up and fill the result with
+                * zeroes.
+                */
+               if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+                       memset(kaddr, 0, PAGE_SIZE);
+               kunmap_atomic(kaddr, KM_USER0);
+               return;
+               
+       }
+       copy_user_highpage(dst, src, va);
  }
  
  /*
@@ -1345,9 +1534,6 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page
   * to a shared page. It is done by copying the page to a new address
   * and decrementing the shared-page counter for the old page.
   *
- * Goto-purists beware: the only reason for goto's here is that it results
- * in better assembly code.. The "default" path will see no jumps at all.
- *
   * Note that this routine assumes that the protection checks have been
   * done by the caller (the low-level page fault routine in most cases).
   * Thus we can safely just mark it writable once we've done any necessary
@@ -1357,105 +1543,76 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page
   * change only once the write actually happens. This avoids a few races,
   * and potentially makes it more efficient.
   *
- * We hold the mm semaphore and the page_table_lock on entry and exit
- * with the page_table_lock released.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), with pte both mapped and locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
-static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
-       unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               spinlock_t *ptl, pte_t orig_pte)
  {
         struct page *old_page, *new_page;
-       unsigned long pfn = pte_pfn(pte);
         pte_t entry;
-       int ret;
-#ifdef CONFIG_XEN
-       struct page invalid_page;
-#endif
+       int ret = VM_FAULT_MINOR;
  
-       if (unlikely(!pfn_valid(pfn))) {
-#ifdef CONFIG_XEN
-               /* This can happen with /dev/mem (PROT_WRITE, MAP_PRIVATE). */
-               invalid_page.flags = (1<<PG_reserved) | (1<<PG_locked);
-               old_page = &invalid_page;
-       } else {
-               old_page = pfn_to_page(pfn);
-#else
-               /*
-                * This should really halt the system so it can be debugged or
-                * at least the kernel stops what it's doing before it corrupts
-                * data, but for the moment just pretend this is OOM.
-                */
-               pte_unmap(page_table);
-               printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
-                               address);
-               spin_unlock(&mm->page_table_lock);
-               return VM_FAULT_OOM;
-#endif
-       }
-#ifndef CONFIG_XEN
-       old_page = pfn_to_page(pfn);
-#endif
+       old_page = vm_normal_page(vma, address, orig_pte);
+       if (!old_page)
+               goto gotten;
  
         if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
                 int reuse = can_share_swap_page(old_page);
                 unlock_page(old_page);
                 if (reuse) {
-                       flush_cache_page(vma, address, pfn);
-                       entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
-                                             vma);
+                       flush_cache_page(vma, address, pte_pfn(orig_pte));
+                       entry = pte_mkyoung(orig_pte);
+                       entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                         ptep_set_access_flags(vma, address, page_table, entry, 1);
                         update_mmu_cache(vma, address, entry);
                         lazy_mmu_prot_update(entry);
-                       pte_unmap(page_table);
-                       spin_unlock(&mm->page_table_lock);
-                       return VM_FAULT_MINOR|VM_FAULT_WRITE;
+                       ret |= VM_FAULT_WRITE;
+                       goto unlock;
                 }
         }
-       pte_unmap(page_table);
  
         /*
          * Ok, we need to copy. Oh, well..
          */
-       if (!PageReserved(old_page))
-               page_cache_get(old_page);
-       spin_unlock(&mm->page_table_lock);
+       page_cache_get(old_page);
+gotten:
+       pte_unmap_unlock(page_table, ptl);
  
         if (unlikely(anon_vma_prepare(vma)))
-               goto no_new_page;
+               goto oom;
         if (old_page == ZERO_PAGE(address)) {
                 new_page = alloc_zeroed_user_highpage(vma, address);
                 if (!new_page)
-                       goto no_new_page;
+                       goto oom;
         } else {
                 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
                 if (!new_page)
-                       goto no_new_page;
-#ifndef CONFIG_XEN
-               copy_user_highpage(new_page, old_page, address);
-#else
-               if (old_page == &invalid_page) {
-                       char *vto = kmap_atomic(new_page, KM_USER1);
-                       copy_page(vto, (void *)(address & PAGE_MASK));
-                       kunmap_atomic(vto, KM_USER1);
-               } else {
-                       copy_user_highpage(new_page, old_page, address);
-               }
-#endif
+                       goto oom;
+               cow_user_page(new_page, old_page, address);
         }
+
         /*
          * Re-check the pte - we dropped the lock
          */
-       ret = VM_FAULT_MINOR;
-       spin_lock(&mm->page_table_lock);
-       page_table = pte_offset_map(pmd, address);
-       if (likely(pte_same(*page_table, pte))) {
-               if (PageAnon(old_page))
-                       dec_mm_counter(mm, anon_rss);
-               if (PageReserved(old_page))
-                       inc_mm_counter(mm, rss);
-               else
+       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (likely(pte_same(*page_table, orig_pte))) {
+               if (old_page) {
                         page_remove_rmap(old_page);
-               flush_cache_page(vma, address, pfn);
-               break_cow(vma, new_page, address, page_table);
+                       if (!PageAnon(old_page)) {
+                               dec_mm_counter(mm, file_rss);
+                               inc_mm_counter(mm, anon_rss);
+                       }
+               } else
+                       inc_mm_counter(mm, anon_rss);
+               flush_cache_page(vma, address, pte_pfn(orig_pte));
+               entry = mk_pte(new_page, vma->vm_page_prot);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+               ptep_establish(vma, address, page_table, entry);
+               update_mmu_cache(vma, address, entry);
+               lazy_mmu_prot_update(entry);
                 lru_cache_add_active(new_page);
                 page_add_anon_rmap(new_page, vma, address);
  
@@ -1463,14 +1620,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
                 new_page = old_page;
                 ret |= VM_FAULT_WRITE;
         }
-       pte_unmap(page_table);
-       page_cache_release(new_page);
-       page_cache_release(old_page);
-       spin_unlock(&mm->page_table_lock);
+       if (new_page)
+               page_cache_release(new_page);
+       if (old_page)
+               page_cache_release(old_page);
+unlock:
+       pte_unmap_unlock(page_table, ptl);
         return ret;
-
-no_new_page:
-       page_cache_release(old_page);
+oom:
+       if (old_page)
+               page_cache_release(old_page);
         return VM_FAULT_OOM;
  }
  
@@ -1539,13 +1698,6 @@ again:
  
         restart_addr = zap_page_range(vma, start_addr,
                                         end_addr - start_addr, details);
-
-       /*
-        * We cannot rely on the break test in unmap_vmas:
-        * on the one hand, we don't want to restart our loop
-        * just because that broke out for the page_table_lock;
-        * on the other hand, it does no test when vma is small.
-        */
         need_break = need_resched() ||
                         need_lockbreak(details->i_mmap_lock);
  
@@ -1794,38 +1946,37 @@ void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struc
  }
  
  /*
- * We hold the mm semaphore and the page_table_lock on entry and
- * should release the pagetable lock on exit..
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
-static int do_swap_page(struct mm_struct * mm,
-       struct vm_area_struct * vma, unsigned long address,
-       pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
+static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access, pte_t orig_pte)
  {
+       spinlock_t *ptl;
         struct page *page;
-       swp_entry_t entry = pte_to_swp_entry(orig_pte);
+       swp_entry_t entry;
         pte_t pte;
         int ret = VM_FAULT_MINOR;
  
-       pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+       if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+               goto out;
+
+       entry = pte_to_swp_entry(orig_pte);
         page = lookup_swap_cache(entry);
         if (!page) {
                 swapin_readahead(entry, address, vma);
                 page = read_swap_cache_async(entry, vma, address);
                 if (!page) {
                         /*
-                        * Back out if somebody else faulted in this pte while
-                        * we released the page table lock.
+                        * Back out if somebody else faulted in this pte
+                        * while we released the pte lock.
                          */
-                       spin_lock(&mm->page_table_lock);
-                       page_table = pte_offset_map(pmd, address);
+                       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
                         if (likely(pte_same(*page_table, orig_pte)))
                                 ret = VM_FAULT_OOM;
-                       else
-                               ret = VM_FAULT_MINOR;
-                       pte_unmap(page_table);
-                       spin_unlock(&mm->page_table_lock);
-                       goto out;
+                       goto unlock;
                 }
  
                 /* Had to read the page from swap area: Major fault */
@@ -1838,15 +1989,11 @@ static int do_swap_page(struct mm_struct * mm,
         lock_page(page);
  
         /*
-        * Back out if somebody else faulted in this pte while we
-        * released the page table lock.
+        * Back out if somebody else already faulted in this pte.
          */
-       spin_lock(&mm->page_table_lock);
-       page_table = pte_offset_map(pmd, address);
-       if (unlikely(!pte_same(*page_table, orig_pte))) {
-               ret = VM_FAULT_MINOR;
+       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (unlikely(!pte_same(*page_table, orig_pte)))
                 goto out_nomap;
-       }
  
         if (unlikely(!PageUptodate(page))) {
                 ret = VM_FAULT_SIGBUS;
@@ -1855,7 +2002,7 @@ static int do_swap_page(struct mm_struct * mm,
  
         /* The page isn't present yet, go ahead with the fault. */
  
-       inc_mm_counter(mm, rss);
+       inc_mm_counter(mm, anon_rss);
         pte = mk_pte(page, vma->vm_page_prot);
         if (write_access && can_share_swap_page(page)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1873,7 +2020,7 @@ static int do_swap_page(struct mm_struct * mm,
  
         if (write_access) {
                 if (do_wp_page(mm, vma, address,
-                               page_table, pmd, pte) == VM_FAULT_OOM)
+                               page_table, pmd, ptl, pte) == VM_FAULT_OOM)
                         ret = VM_FAULT_OOM;
                 goto out;
         }
@@ -1881,74 +2028,76 @@ static int do_swap_page(struct mm_struct * mm,
         /* No need to invalidate - it was non-present before */
         update_mmu_cache(vma, address, pte);
         lazy_mmu_prot_update(pte);
-       pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+unlock:
+       pte_unmap_unlock(page_table, ptl);
  out:
         return ret;
  out_nomap:
-       pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+       pte_unmap_unlock(page_table, ptl);
         unlock_page(page);
         page_cache_release(page);
-       goto out;
+       return ret;
  }
  
  /*
- * We are called with the MM semaphore and page_table_lock
- * spinlock held to protect against concurrent faults in
- * multithreaded programs. 
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
-static int
-do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
-               pte_t *page_table, pmd_t *pmd, int write_access,
-               unsigned long addr)
+static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access)
  {
+       struct page *page;
+       spinlock_t *ptl;
         pte_t entry;
-       struct page * page = ZERO_PAGE(addr);
-
-       /* Read-only mapping of ZERO_PAGE. */
-       entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
  
-       /* ..except if it's a write access */
         if (write_access) {
                 /* Allocate our own private page. */
                 pte_unmap(page_table);
-               spin_unlock(&mm->page_table_lock);
  
                 if (unlikely(anon_vma_prepare(vma)))
-                       goto no_mem;
-               page = alloc_zeroed_user_highpage(vma, addr);
+                       goto oom;
+               page = alloc_zeroed_user_highpage(vma, address);
                 if (!page)
-                       goto no_mem;
+                       goto oom;
  
-               spin_lock(&mm->page_table_lock);
-               page_table = pte_offset_map(pmd, addr);
+               entry = mk_pte(page, vma->vm_page_prot);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
  
-               if (!pte_none(*page_table)) {
-                       pte_unmap(page_table);
-                       page_cache_release(page);
-                       spin_unlock(&mm->page_table_lock);
-                       goto out;
-               }
-               inc_mm_counter(mm, rss);
-               entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
-                                                        vma->vm_page_prot)),
-                                     vma);
+               page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+               if (!pte_none(*page_table))
+                       goto release;
+               inc_mm_counter(mm, anon_rss);
                 lru_cache_add_active(page);
                 SetPageReferenced(page);
-               page_add_anon_rmap(page, vma, addr);
+               page_add_anon_rmap(page, vma, address);
+       } else {
+               /* Map the ZERO_PAGE - vm_page_prot is readonly */
+               page = ZERO_PAGE(address);
+               page_cache_get(page);
+               entry = mk_pte(page, vma->vm_page_prot);
+
+               ptl = pte_lockptr(mm, pmd);
+               spin_lock(ptl);
+               if (!pte_none(*page_table))
+                       goto release;
+               inc_mm_counter(mm, file_rss);
+               page_add_file_rmap(page);
         }
  
-       set_pte_at(mm, addr, page_table, entry);
-       pte_unmap(page_table);
+       set_pte_at(mm, address, page_table, entry);
  
         /* No need to invalidate - it was non-present before */
-       update_mmu_cache(vma, addr, entry);
+       update_mmu_cache(vma, address, entry);
         lazy_mmu_prot_update(entry);
-       spin_unlock(&mm->page_table_lock);
-out:
+unlock:
+       pte_unmap_unlock(page_table, ptl);
         return VM_FAULT_MINOR;
-no_mem:
+release:
+       page_cache_release(page);
+       goto unlock;
+oom:
         return VM_FAULT_OOM;
  }
  
@@ -1961,25 +2110,24 @@ no_mem:
   * As this is called only for pages that do not currently exist, we
   * do not need to flush old virtual caches or the TLB.
   *
- * This is called with the MM semaphore held and the page table
- * spinlock held. Exit with the spinlock released.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
-static int
-do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
-       unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
+static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access)
  {
-       struct page * new_page;
+       spinlock_t *ptl;
+       struct page *new_page;
         struct address_space *mapping = NULL;
         pte_t entry;
         unsigned int sequence = 0;
         int ret = VM_FAULT_MINOR;
         int anon = 0;
  
-       if (!vma->vm_ops || !vma->vm_ops->nopage)
-               return do_anonymous_page(mm, vma, page_table,
-                                       pmd, write_access, address);
         pte_unmap(page_table);
-       spin_unlock(&mm->page_table_lock);
+       BUG_ON(vma->vm_flags & VM_PFNMAP);
  
         if (vma->vm_file) {
                 mapping = vma->vm_file->f_mapping;
@@ -1987,7 +2135,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 smp_rmb(); /* serializes i_size against truncate_count */
         }
  retry:
-       cond_resched();
         new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
         /*
          * No smp_rmb is needed here as long as there's a full
@@ -2020,19 +2167,20 @@ retry:
                 anon = 1;
         }
  
-       spin_lock(&mm->page_table_lock);
+       page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
         /*
          * For a file-backed vma, someone could have truncated or otherwise
          * invalidated this page.  If unmap_mapping_range got called,
          * retry getting the page.
          */
         if (mapping && unlikely(sequence != mapping->truncate_count)) {
-               sequence = mapping->truncate_count;
-               spin_unlock(&mm->page_table_lock);
+               pte_unmap_unlock(page_table, ptl);
                 page_cache_release(new_page);
+               cond_resched();
+               sequence = mapping->truncate_count;
+               smp_rmb();
                 goto retry;
         }
-       page_table = pte_offset_map(pmd, address);
  
         /*
          * This silly early PAGE_DIRTY setting removes a race
@@ -2046,68 +2194,67 @@ retry:
          */
         /* Only go through if we didn't race with anybody else... */
         if (pte_none(*page_table)) {
-               if (!PageReserved(new_page))
-                       inc_mm_counter(mm, rss);
-
                 flush_icache_page(vma, new_page);
                 entry = mk_pte(new_page, vma->vm_page_prot);
                 if (write_access)
                         entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                 set_pte_at(mm, address, page_table, entry);
                 if (anon) {
+                       inc_mm_counter(mm, anon_rss);
                         lru_cache_add_active(new_page);
                         page_add_anon_rmap(new_page, vma, address);
-               } else
+               } else {
+                       inc_mm_counter(mm, file_rss);
                         page_add_file_rmap(new_page);
-               pte_unmap(page_table);
+               }
         } else {
                 /* One of our sibling threads was faster, back out. */
-               pte_unmap(page_table);
                 page_cache_release(new_page);
-               spin_unlock(&mm->page_table_lock);
-               goto out;
+               goto unlock;
         }
  
         /* no need to invalidate: a not-present page shouldn't be cached */
         update_mmu_cache(vma, address, entry);
         lazy_mmu_prot_update(entry);
-       spin_unlock(&mm->page_table_lock);
-out:
+unlock:
+       pte_unmap_unlock(page_table, ptl);
         return ret;
  oom:
         page_cache_release(new_page);
-       ret = VM_FAULT_OOM;
-       goto out;
+       return VM_FAULT_OOM;
  }
  
  /*
   * Fault of a previously existing named mapping. Repopulate the pte
   * from the encoded file_pte if possible. This enables swappable
   * nonlinear vmas.
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
-static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
-       unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
+static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pte_t *page_table, pmd_t *pmd,
+               int write_access, pte_t orig_pte)
  {
-       unsigned long pgoff;
+       pgoff_t pgoff;
         int err;
  
-       BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
-       /*
-        * Fall back to the linear mapping if the fs does not support
-        * ->populate:
-        */
-       if (!vma->vm_ops->populate ||
-                       (write_access && !(vma->vm_flags & VM_SHARED))) {
-               pte_clear(mm, address, pte);
-               return do_no_page(mm, vma, address, write_access, pte, pmd);
-       }
-
-       pgoff = pte_to_pgoff(*pte);
+       if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+               return VM_FAULT_MINOR;
  
-       pte_unmap(pte);
-       spin_unlock(&mm->page_table_lock);
+       if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
+               /*
+                * Page table corrupted: show pte and kill process.
+                */
+               print_bad_pte(vma, orig_pte, address);
+               return VM_FAULT_OOM;
+       }
+       /* We can then assume vm->vm_ops && vma->vm_ops->populate */
  
-       err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
+       pgoff = pte_to_pgoff(orig_pte);
+       err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+                                       vma->vm_page_prot, pgoff, 0);
         if (err == -ENOMEM)
                 return VM_FAULT_OOM;
         if (err)
@@ -2124,56 +2271,68 @@ static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
   * with external mmu caches can use to update those (ie the Sparc or
   * PowerPC hashed page tables that act as extended TLBs).
   *
- * Note the "page_table_lock". It is to protect against kswapd removing
- * pages from under us. Note that kswapd only ever _removes_ pages, never
- * adds them. As such, once we have noticed that the page is not present,
- * we can drop the lock early.
- *
- * The adding of pages is protected by the MM semaphore (which we hold),
- * so we don't need to worry about a page being suddenly been added into
- * our VM.
- *
- * We enter with the pagetable spinlock held, we are supposed to
- * release it when done.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
  static inline int handle_pte_fault(struct mm_struct *mm,
-       struct vm_area_struct * vma, unsigned long address,
-       int write_access, pte_t *pte, pmd_t *pmd)
+               struct vm_area_struct *vma, unsigned long address,
+               pte_t *pte, pmd_t *pmd, int write_access)
  {
         pte_t entry;
+       pte_t old_entry;
+       spinlock_t *ptl;
  
-       entry = *pte;
+       old_entry = entry = *pte;
         if (!pte_present(entry)) {
-               /*
-                * If it truly wasn't present, we know that kswapd
-                * and the PTE updates will not touch it later. So
-                * drop the lock.
-                */
-               if (pte_none(entry))
-                       return do_no_page(mm, vma, address, write_access, pte, pmd);
+               if (pte_none(entry)) {
+                       if (!vma->vm_ops || !vma->vm_ops->nopage)
+                               return do_anonymous_page(mm, vma, address,
+                                       pte, pmd, write_access);
+                       return do_no_page(mm, vma, address,
+                                       pte, pmd, write_access);
+               }
                 if (pte_file(entry))
-                       return do_file_page(mm, vma, address, write_access, pte, pmd);
-               return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
+                       return do_file_page(mm, vma, address,
+                                       pte, pmd, write_access, entry);
+               return do_swap_page(mm, vma, address,
+                                       pte, pmd, write_access, entry);
         }
  
+       ptl = pte_lockptr(mm, pmd);
+       spin_lock(ptl);
+       if (unlikely(!pte_same(*pte, entry)))
+               goto unlock;
         if (write_access) {
                 if (!pte_write(entry))
-                       return do_wp_page(mm, vma, address, pte, pmd, entry);
+                       return do_wp_page(mm, vma, address,
+                                       pte, pmd, ptl, entry);
                 entry = pte_mkdirty(entry);
         }
         entry = pte_mkyoung(entry);
-       ptep_set_access_flags(vma, address, pte, entry, write_access);
-       update_mmu_cache(vma, address, entry);
-       lazy_mmu_prot_update(entry);
-       pte_unmap(pte);
-       spin_unlock(&mm->page_table_lock);
+       if (!pte_same(old_entry, entry)) {
+               ptep_set_access_flags(vma, address, pte, entry, write_access);
+               update_mmu_cache(vma, address, entry);
+               lazy_mmu_prot_update(entry);
+       } else {
+               /*
+                * This is needed only for protection faults but the arch code
+                * is not yet telling us if this is a protection fault or not.
+                * This still avoids useless tlb flushes for .text page faults
+                * with threads.
+                */
+               if (write_access)
+                       flush_tlb_page(vma, address);
+       }
+unlock:
+       pte_unmap_unlock(pte, ptl);
         return VM_FAULT_MINOR;
  }
  
  /*
   * By the time we get here, we already hold the mm semaphore
   */
-int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned long address, int write_access)
  {
         pgd_t *pgd;
@@ -2188,100 +2347,78 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
         if (unlikely(is_vm_hugetlb_page(vma)))
                 return hugetlb_fault(mm, vma, address, write_access);
  
-       /*
-        * We need the page table lock to synchronize with kswapd
-        * and the SMP-safe atomic PTE updates.
-        */
         pgd = pgd_offset(mm, address);
-       spin_lock(&mm->page_table_lock);
-
         pud = pud_alloc(mm, pgd, address);
         if (!pud)
-               goto oom;
-
+               return VM_FAULT_OOM;
         pmd = pmd_alloc(mm, pud, address);
         if (!pmd)
-               goto oom;
-
+               return VM_FAULT_OOM;
         pte = pte_alloc_map(mm, pmd, address);
         if (!pte)
-               goto oom;
-       
-       return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+               return VM_FAULT_OOM;
  
- oom:
-       spin_unlock(&mm->page_table_lock);
-       return VM_FAULT_OOM;
+       return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
  }
  
  #ifndef __PAGETABLE_PUD_FOLDED
  /*
   * Allocate page upper directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
+ * We've already handled the fast-path in-line.
   */
-pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
  {
-       pud_t *new;
-
-       spin_unlock(&mm->page_table_lock);
-       new = pud_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
+       pud_t *new = pud_alloc_one(mm, address);
         if (!new)
-               return NULL;
+               return -ENOMEM;
  
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
-       if (pgd_present(*pgd)) {
+       spin_lock(&mm->page_table_lock);
+       if (pgd_present(*pgd))          /* Another has populated it */
                 pud_free(new);
-               goto out;
-       }
-       pgd_populate(mm, pgd, new);
- out:
-       return pud_offset(pgd, address);
+       else
+               pgd_populate(mm, pgd, new);
+       spin_unlock(&mm->page_table_lock);
+       return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+       return 0;
  }
  #endif /* __PAGETABLE_PUD_FOLDED */
  
  #ifndef __PAGETABLE_PMD_FOLDED
  /*
   * Allocate page middle directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
+ * We've already handled the fast-path in-line.
   */
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
  {
-       pmd_t *new;
-
-       spin_unlock(&mm->page_table_lock);
-       new = pmd_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
+       pmd_t *new = pmd_alloc_one(mm, address);
         if (!new)
-               return NULL;
+               return -ENOMEM;
  
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
+       spin_lock(&mm->page_table_lock);
  #ifndef __ARCH_HAS_4LEVEL_HACK
-       if (pud_present(*pud)) {
+       if (pud_present(*pud))          /* Another has populated it */
                 pmd_free(new);
-               goto out;
-       }
-       pud_populate(mm, pud, new);
+       else
+               pud_populate(mm, pud, new);
  #else
-       if (pgd_present(*pud)) {
+       if (pgd_present(*pud))          /* Another has populated it */
                 pmd_free(new);
-               goto out;
-       }
-       pgd_populate(mm, pud, new);
+       else
+               pgd_populate(mm, pud, new);
  #endif /* __ARCH_HAS_4LEVEL_HACK */
-
- out:
-       return pmd_offset(pud, address);
+       spin_unlock(&mm->page_table_lock);
+       return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+       return 0;
  }
  #endif /* __PAGETABLE_PMD_FOLDED */
  
@@ -2346,22 +2483,6 @@ unsigned long vmalloc_to_pfn(void * vmalloc_addr)
  
  EXPORT_SYMBOL(vmalloc_to_pfn);
  
-/*
- * update_mem_hiwater
- *     - update per process rss and vm high water data
- */
-void update_mem_hiwater(struct task_struct *tsk)
-{
-       if (tsk->mm) {
-               unsigned long rss = get_mm_counter(tsk->mm, rss);
-
-               if (tsk->mm->hiwater_rss < rss)
-                       tsk->mm->hiwater_rss = rss;
-               if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
-                       tsk->mm->hiwater_vm = tsk->mm->total_vm;
-       }
-}
-
  #if !defined(__HAVE_ARCH_GATE_AREA)
  
  #if defined(AT_SYSINFO_EHDR)
diff --git a/linux-2.6-xen-sparse/mm/mmap.c b/linux-2.6-xen-sparse/mm/mmap.c

index 15e0eb9f88d010225c306c5a1ab4aa64c9573fd6..b9d53b5e79ad085c7867d99bf27b9a9c928b9606 100644 (file)
--- a/linux-2.6-xen-sparse/mm/mmap.c
+++ b/linux-2.6-xen-sparse/mm/mmap.c
@@ -155,10 +155,6 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
         return -ENOMEM;
  }
  
-EXPORT_SYMBOL(sysctl_overcommit_memory);
-EXPORT_SYMBOL(sysctl_overcommit_ratio);
-EXPORT_SYMBOL(sysctl_max_map_count);
-EXPORT_SYMBOL(vm_committed_space);
  EXPORT_SYMBOL(__vm_enough_memory);
  
  /*
@@ -181,26 +177,36 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
  }
  
  /*
- * Remove one vm structure and free it.
+ * Unlink a file-based vm structure from its prio_tree, to hide
+ * vma from rmap and vmtruncate before freeing its page tables.
   */
-static void remove_vm_struct(struct vm_area_struct *vma)
+void unlink_file_vma(struct vm_area_struct *vma)
  {
         struct file *file = vma->vm_file;
  
-       might_sleep();
         if (file) {
                 struct address_space *mapping = file->f_mapping;
                 spin_lock(&mapping->i_mmap_lock);
                 __remove_shared_vm_struct(vma, file, mapping);
                 spin_unlock(&mapping->i_mmap_lock);
         }
+}
+
+/*
+ * Close a vm structure and free it, returning the next.
+ */
+static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+{
+       struct vm_area_struct *next = vma->vm_next;
+
+       might_sleep();
         if (vma->vm_ops && vma->vm_ops->close)
                 vma->vm_ops->close(vma);
-       if (file)
-               fput(file);
-       anon_vma_unlink(vma);
+       if (vma->vm_file)
+               fput(vma->vm_file);
         mpol_free(vma_policy(vma));
         kmem_cache_free(vm_area_cachep, vma);
+       return next;
  }
  
  asmlinkage unsigned long sys_brk(unsigned long brk)
@@ -605,7 +611,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
   * If the vma has a ->close operation then the driver probably needs to release
   * per-vma resources, so we don't attempt to merge those.
   */
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
  
  static inline int is_mergeable_vma(struct vm_area_struct *vma,
                         struct file *file, unsigned long vm_flags)
@@ -832,7 +838,7 @@ none:
  }
  
  #ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
+void vm_stat_account(struct mm_struct *mm, unsigned long flags,
                                                 struct file *file, long pages)
  {
         const unsigned long stack_flags
@@ -1110,7 +1116,7 @@ munmap_back:
         }
  out:   
         mm->total_vm += len >> PAGE_SHIFT;
-       __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+       vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
         if (vm_flags & VM_LOCKED) {
                 mm->locked_vm += len >> PAGE_SHIFT;
                 make_pages_present(addr, addr + len);
@@ -1475,15 +1481,19 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
         mm->total_vm += grow;
         if (vma->vm_flags & VM_LOCKED)
                 mm->locked_vm += grow;
-       __vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+       vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
         return 0;
  }
  
-#ifdef CONFIG_STACK_GROWSUP
+#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
  /*
- * vma is the first one with address > vma->vm_end.  Have to extend vma.
+ * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+ * vma is the last one with address > vma->vm_end.  Have to extend vma.
   */
-int expand_stack(struct vm_area_struct * vma, unsigned long address)
+#ifndef CONFIG_IA64
+static inline
+#endif
+int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  {
         int error;
  
@@ -1521,6 +1531,13 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address)
         anon_vma_unlock(vma);
         return error;
  }
+#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
+
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+       return expand_upwards(vma, address);
+}
  
  struct vm_area_struct *
  find_extend_vma(struct mm_struct *mm, unsigned long addr)
@@ -1603,36 +1620,24 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
  }
  #endif
  
-/* Normal function to fix up a mapping
- * This function is the default for when an area has no specific
- * function.  This may be used as part of a more specific routine.
- *
- * By the time this function is called, the area struct has been
- * removed from the process mapping list.
- */
-static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
-{
-       size_t len = area->vm_end - area->vm_start;
-
-       area->vm_mm->total_vm -= len >> PAGE_SHIFT;
-       if (area->vm_flags & VM_LOCKED)
-               area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
-       vm_stat_unaccount(area);
-       remove_vm_struct(area);
-}
-
  /*
- * Update the VMA and inode share lists.
- *
- * Ok - we have the memory areas we should free on the 'free' list,
+ * Ok - we have the memory areas we should free on the vma list,
   * so release them, and do the vma updates.
+ *
+ * Called with the mm semaphore held.
   */
-static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
  {
+       /* Update high watermark before we lower total_vm */
+       update_hiwater_vm(mm);
         do {
-               struct vm_area_struct *next = vma->vm_next;
-               unmap_vma(mm, vma);
-               vma = next;
+               long nrpages = vma_pages(vma);
+
+               mm->total_vm -= nrpages;
+               if (vma->vm_flags & VM_LOCKED)
+                       mm->locked_vm -= nrpages;
+               vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
+               vma = remove_vma(vma);
         } while (vma);
         validate_mm(mm);
  }
@@ -1651,14 +1656,13 @@ static void unmap_region(struct mm_struct *mm,
         unsigned long nr_accounted = 0;
  
         lru_add_drain();
-       spin_lock(&mm->page_table_lock);
         tlb = tlb_gather_mmu(mm, 0);
-       unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
+       update_hiwater_rss(mm);
+       unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
         free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
                                  next? next->vm_start: 0);
         tlb_finish_mmu(tlb, start, end);
-       spin_unlock(&mm->page_table_lock);
  }
  
  /*
@@ -1799,7 +1803,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
         unmap_region(mm, vma, prev, start, end);
  
         /* Fix up all other VM information */
-       unmap_vma_list(mm, vma);
+       remove_vma_list(mm, vma);
  
         return 0;
  }
@@ -1821,7 +1825,7 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
  
  static inline void verify_mm_writelocked(struct mm_struct *mm)
  {
-#ifdef CONFIG_DEBUG_KERNEL
+#ifdef CONFIG_DEBUG_VM
         if (unlikely(down_read_trylock(&mm->mmap_sem))) {
                 WARN_ON(1);
                 up_read(&mm->mmap_sem);
@@ -1937,34 +1941,21 @@ void exit_mmap(struct mm_struct *mm)
  #endif
  
         lru_add_drain();
-
-       spin_lock(&mm->page_table_lock);
-
         flush_cache_mm(mm);
         tlb = tlb_gather_mmu(mm, 1);
+       /* Don't update_hiwater_rss(mm) here, do_exit already did */
         /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
+       end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
         free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
         tlb_finish_mmu(tlb, 0, end);
  
-       mm->mmap = mm->mmap_cache = NULL;
-       mm->mm_rb = RB_ROOT;
-       set_mm_counter(mm, rss, 0);
-       mm->total_vm = 0;
-       mm->locked_vm = 0;
-
-       spin_unlock(&mm->page_table_lock);
-
         /*
-        * Walk the list again, actually closing and freeing it
-        * without holding any MM locks.
+        * Walk the list again, actually closing and freeing it,
+        * with preemption enabled, without holding any MM locks.
          */
-       while (vma) {
-               struct vm_area_struct *next = vma->vm_next;
-               remove_vm_struct(vma);
-               vma = next;
-       }
+       while (vma)
+               vma = remove_vma(vma);
  
         BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
  }
diff --git a/linux-2.6-xen-sparse/mm/page_alloc.c b/linux-2.6-xen-sparse/mm/page_alloc.c

index 9b94f63b1a09e54144c1ab9591762e71a1b16418..5a9daa2fff06bd584c200cf9cd1dde441cfd4650 100644 (file)
--- a/linux-2.6-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c
@@ -33,6 +33,7 @@
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
+#include <linux/memory_hotplug.h>
  #include <linux/nodemask.h>
  #include <linux/vmalloc.h>
  
@@ -59,11 +60,13 @@ long nr_swap_pages;
   *     NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
   *     HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
   *     HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ *
+ * TBD: should special case ZONE_DMA32 machines here - in those we normally
+ * don't need any ZONE_NORMAL reservation
   */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
  
  EXPORT_SYMBOL(totalram_pages);
-EXPORT_SYMBOL(nr_swap_pages);
  
  /*
   * Used by page_zone() to look up the address of the struct zone whose
@@ -72,27 +75,50 @@ EXPORT_SYMBOL(nr_swap_pages);
  struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
  EXPORT_SYMBOL(zone_table);
  
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
  int min_free_kbytes = 1024;
  
  unsigned long __initdata nr_kernel_pages;
  unsigned long __initdata nr_all_pages;
  
+static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
+{
+       int ret = 0;
+       unsigned seq;
+       unsigned long pfn = page_to_pfn(page);
+
+       do {
+               seq = zone_span_seqbegin(zone);
+               if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
+                       ret = 1;
+               else if (pfn < zone->zone_start_pfn)
+                       ret = 1;
+       } while (zone_span_seqretry(zone, seq));
+
+       return ret;
+}
+
+static int page_is_consistent(struct zone *zone, struct page *page)
+{
+#ifdef CONFIG_HOLES_IN_ZONE
+       if (!pfn_valid(page_to_pfn(page)))
+               return 0;
+#endif
+       if (zone != page_zone(page))
+               return 0;
+
+       return 1;
+}
  /*
   * Temporary debugging check for pages not lying within a given zone.
   */
  static int bad_range(struct zone *zone, struct page *page)
  {
-       if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
-               return 1;
-       if (page_to_pfn(page) < zone->zone_start_pfn)
-               return 1;
-#ifdef CONFIG_HOLES_IN_ZONE
-       if (!pfn_valid(page_to_pfn(page)))
+       if (page_outside_zone_boundaries(zone, page))
                 return 1;
-#endif
-       if (zone != page_zone(page))
+       if (!page_is_consistent(zone, page))
                 return 1;
+
         return 0;
  }
  
@@ -101,7 +127,7 @@ static void bad_page(const char *function, struct page *page)
         printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
                 function, current->comm, page);
         printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-               (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+               (int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
                 page->mapping, page_mapcount(page), page_count(page));
         printk(KERN_EMERG "Backtrace:\n");
         dump_stack();
@@ -114,17 +140,13 @@ static void bad_page(const char *function, struct page *page)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback);
+                       1 << PG_writeback );
         set_page_count(page, 0);
         reset_page_mapcount(page);
         page->mapping = NULL;
         add_taint(TAINT_BAD_PAGE);
  }
  
-#ifndef CONFIG_HUGETLB_PAGE
-#define prep_compound_page(page, order) do { } while (0)
-#define destroy_compound_page(page, order) do { } while (0)
-#else
  /*
   * Higher-order pages are called "compound pages".  They are structured thusly:
   *
@@ -153,7 +175,7 @@ static void prep_compound_page(struct page *page, unsigned long order)
                 struct page *p = page + i;
  
                 SetPageCompound(p);
-               p->private = (unsigned long)page;
+               set_page_private(p, (unsigned long)page);
         }
  }
  
@@ -173,12 +195,11 @@ static void destroy_compound_page(struct page *page, unsigned long order)
  
                 if (!PageCompound(p))
                         bad_page(__FUNCTION__, page);
-               if (p->private != (unsigned long)page)
+               if (page_private(p) != (unsigned long)page)
                         bad_page(__FUNCTION__, page);
                 ClearPageCompound(p);
         }
  }
-#endif         /* CONFIG_HUGETLB_PAGE */
  
  /*
   * function for dealing with page's order in buddy system.
@@ -186,18 +207,18 @@ static void destroy_compound_page(struct page *page, unsigned long order)
   * So, we don't need atomic page->flags operations here.
   */
  static inline unsigned long page_order(struct page *page) {
-       return page->private;
+       return page_private(page);
  }
  
  static inline void set_page_order(struct page *page, int order) {
-       page->private = order;
+       set_page_private(page, order);
         __SetPagePrivate(page);
  }
  
  static inline void rmv_page_order(struct page *page)
  {
         __ClearPagePrivate(page);
-       page->private = 0;
+       set_page_private(page, 0);
  }
  
  /*
@@ -237,14 +258,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
   * (a) the buddy is free &&
   * (b) the buddy is on the buddy system &&
   * (c) a page and its buddy have the same order.
- * for recording page's order, we use page->private and PG_private.
+ * for recording page's order, we use page_private(page) and PG_private.
   *
   */
  static inline int page_is_buddy(struct page *page, int order)
  {
         if (PagePrivate(page)           &&
             (page_order(page) == order) &&
-           !PageReserved(page)         &&
              page_count(page) == 0)
                 return 1;
         return 0;
@@ -264,7 +284,7 @@ static inline int page_is_buddy(struct page *page, int order)
   * parts of the VM system.
   * At each level, we keep a list of pages, which are heads of continuous
   * free pages of length of (1 << order) and marked with PG_Private.Page's
- * order is recorded in page->private field.
+ * order is recorded in page_private(page) field.
   * So when we are allocating or freeing one, we can derive the state of the
   * other.  That is, if we allocate a small block, and both were   
   * free, the remainder of the region must be split into blocks.   
@@ -314,7 +334,7 @@ static inline void __free_pages_bulk (struct page *page,
         zone->free_area[order].nr_free++;
  }
  
-static inline void free_pages_check(const char *function, struct page *page)
+static inline int free_pages_check(const char *function, struct page *page)
  {
         if (    page_mapcount(page) ||
                 page->mapping != NULL ||
@@ -327,10 +347,17 @@ static inline void free_pages_check(const char *function, struct page *page)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback )))
+                       1 << PG_writeback |
+                       1 << PG_reserved )))
                 bad_page(function, page);
         if (PageDirty(page))
                 __ClearPageDirty(page);
+       /*
+        * For now, we report if PG_reserved was found set, but do not
+        * clear it, and do not free the page.  But we shall soon need
+        * to do more, for when the ZERO_PAGE count wraps negative.
+        */
+       return PageReserved(page);
  }
  
  /*
@@ -370,12 +397,11 @@ void __free_pages_ok(struct page *page, unsigned int order)
  {
         LIST_HEAD(list);
         int i;
+       int reserved = 0;
  
         if (arch_free_page(page, order))
                 return;
  
-       mod_page_state(pgfree, 1 << order);
-
  #ifndef CONFIG_MMU
         if (order > 0)
                 for (i = 1 ; i < (1 << order) ; ++i)
@@ -383,8 +409,12 @@ void __free_pages_ok(struct page *page, unsigned int order)
  #endif
  
         for (i = 0 ; i < (1 << order) ; ++i)
-               free_pages_check(__FUNCTION__, page + i);
+               reserved += free_pages_check(__FUNCTION__, page + i);
+       if (reserved)
+               return;
+
         list_add(&page->lru, &list);
+       mod_page_state(pgfree, 1 << order);
         kernel_map_pages(page, 1<<order, 0);
         free_pages_bulk(page_zone(page), 1, &list, order);
  }
@@ -442,7 +472,7 @@ void set_page_refs(struct page *page, int order)
  /*
   * This page is about to be returned from the page allocator
   */
-static void prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order)
  {
         if (    page_mapcount(page) ||
                 page->mapping != NULL ||
@@ -456,15 +486,24 @@ static void prep_new_page(struct page *page, int order)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback )))
+                       1 << PG_writeback |
+                       1 << PG_reserved )))
                 bad_page(__FUNCTION__, page);
  
+       /*
+        * For now, we report if PG_reserved was found set, but do not
+        * clear it, and do not allocate the page: as a safety net.
+        */
+       if (PageReserved(page))
+               return 1;
+
         page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
                         1 << PG_referenced | 1 << PG_arch_1 |
                         1 << PG_checked | 1 << PG_mappedtodisk);
-       page->private = 0;
+       set_page_private(page, 0);
         set_page_refs(page, order);
         kernel_map_pages(page, 1 << order, 1);
+       return 0;
  }
  
  /* 
@@ -648,11 +687,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
         if (arch_free_page(page, 0))
                 return;
  
-       kernel_map_pages(page, 1, 0);
-       inc_page_state(pgfree);
         if (PageAnon(page))
                 page->mapping = NULL;
-       free_pages_check(__FUNCTION__, page);
+       if (free_pages_check(__FUNCTION__, page))
+               return;
+
+       inc_page_state(pgfree);
+       kernel_map_pages(page, 1, 0);
+
         pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
         local_irq_save(flags);
         list_add(&page->lru, &pcp->list);
@@ -691,12 +733,14 @@ static struct page *
  buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
  {
         unsigned long flags;
-       struct page *page = NULL;
+       struct page *page;
         int cold = !!(gfp_flags & __GFP_COLD);
  
+again:
         if (order == 0) {
                 struct per_cpu_pages *pcp;
  
+               page = NULL;
                 pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
                 local_irq_save(flags);
                 if (pcp->count <= pcp->low)
@@ -709,9 +753,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
                 }
                 local_irq_restore(flags);
                 put_cpu();
-       }
-
-       if (page == NULL) {
+       } else {
                 spin_lock_irqsave(&zone->lock, flags);
                 page = __rmqueue(zone, order);
                 spin_unlock_irqrestore(&zone->lock, flags);
@@ -720,7 +762,8 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
         if (page != NULL) {
                 BUG_ON(bad_range(zone, page));
                 mod_page_state_zone(zone, pgalloc, 1 << order);
-               prep_new_page(page, order);
+               if (prep_new_page(page, order))
+                       goto again;
  
                 if (gfp_flags & __GFP_ZERO)
                         prep_zero_page(page, order, gfp_flags);
@@ -731,20 +774,28 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
         return page;
  }
  
+#define ALLOC_NO_WATERMARKS    0x01 /* don't check watermarks at all */
+#define ALLOC_WMARK_MIN                0x02 /* use pages_min watermark */
+#define ALLOC_WMARK_LOW                0x04 /* use pages_low watermark */
+#define ALLOC_WMARK_HIGH       0x08 /* use pages_high watermark */
+#define ALLOC_HARDER           0x10 /* try to alloc harder */
+#define ALLOC_HIGH             0x20 /* __GFP_HIGH set */
+#define ALLOC_CPUSET           0x40 /* check for correct cpuset */
+
  /*
   * Return 1 if free pages are above 'mark'. This takes into account the order
   * of the allocation.
   */
  int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-                     int classzone_idx, int can_try_harder, int gfp_high)
+                     int classzone_idx, int alloc_flags)
  {
         /* free_pages my go negative - that's OK */
         long min = mark, free_pages = z->free_pages - (1 << order) + 1;
         int o;
  
-       if (gfp_high)
+       if (alloc_flags & ALLOC_HIGH)
                 min -= min / 2;
-       if (can_try_harder)
+       if (alloc_flags & ALLOC_HARDER)
                 min -= min / 4;
  
         if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -762,14 +813,47 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
         return 1;
  }
  
-static inline int
-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
+/*
+ * get_page_from_freeliest goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist, int alloc_flags)
  {
-       if (!z->reclaim_pages)
-               return 0;
-       if (gfp_mask & __GFP_NORECLAIM)
-               return 0;
-       return 1;
+       struct zone **z = zonelist->zones;
+       struct page *page = NULL;
+       int classzone_idx = zone_idx(*z);
+
+       /*
+        * Go through the zonelist once, looking for a zone with enough free.
+        * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+        */
+       do {
+               if ((alloc_flags & ALLOC_CPUSET) &&
+                               !cpuset_zone_allowed(*z, gfp_mask))
+                       continue;
+
+               if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+                       unsigned long mark;
+                       if (alloc_flags & ALLOC_WMARK_MIN)
+                               mark = (*z)->pages_min;
+                       else if (alloc_flags & ALLOC_WMARK_LOW)
+                               mark = (*z)->pages_low;
+                       else
+                               mark = (*z)->pages_high;
+                       if (!zone_watermark_ok(*z, order, mark,
+                                   classzone_idx, alloc_flags))
+                               continue;
+               }
+
+               page = buffered_rmqueue(*z, order, gfp_mask);
+               if (page) {
+                       zone_statistics(zonelist, *z);
+                       break;
+               }
+       } while (*(++z) != NULL);
+       return page;
  }
  
  /*
@@ -779,106 +863,77 @@ struct page * fastcall
  __alloc_pages(gfp_t gfp_mask, unsigned int order,
                 struct zonelist *zonelist)
  {
-       const int wait = gfp_mask & __GFP_WAIT;
-       struct zone **zones, *z;
+       const gfp_t wait = gfp_mask & __GFP_WAIT;
+       struct zone **z;
         struct page *page;
         struct reclaim_state reclaim_state;
         struct task_struct *p = current;
-       int i;
-       int classzone_idx;
         int do_retry;
-       int can_try_harder;
+       int alloc_flags;
         int did_some_progress;
  
         might_sleep_if(wait);
  
-       /*
-        * The caller may dip into page reserves a bit more if the caller
-        * cannot run direct reclaim, or is the caller has realtime scheduling
-        * policy
-        */
-       can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
-
-       zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+restart:
+       z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
  
-       if (unlikely(zones[0] == NULL)) {
+       if (unlikely(*z == NULL)) {
                 /* Should this ever happen?? */
                 return NULL;
         }
  
-       classzone_idx = zone_idx(zones[0]);
+       page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+                               zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+       if (page)
+               goto got_pg;
+
+       do {
+               wakeup_kswapd(*z, order);
+       } while (*(++z));
  
-restart:
         /*
-        * Go through the zonelist once, looking for a zone with enough free.
-        * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+        * OK, we're below the kswapd watermark and have kicked background
+        * reclaim. Now things get more complex, so set up alloc_flags according
+        * to how we want to proceed.
+        *
+        * The caller may dip into page reserves a bit more if the caller
+        * cannot run direct reclaim, or if the caller has realtime scheduling
+        * policy.
          */
-       for (i = 0; (z = zones[i]) != NULL; i++) {
-               int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
-               if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-                       continue;
-
-               /*
-                * If the zone is to attempt early page reclaim then this loop
-                * will try to reclaim pages and check the watermark a second
-                * time before giving up and falling back to the next zone.
-                */
-zone_reclaim_retry:
-               if (!zone_watermark_ok(z, order, z->pages_low,
-                                      classzone_idx, 0, 0)) {
-                       if (!do_reclaim)
-                               continue;
-                       else {
-                               zone_reclaim(z, gfp_mask, order);
-                               /* Only try reclaim once */
-                               do_reclaim = 0;
-                               goto zone_reclaim_retry;
-                       }
-               }
-
-               page = buffered_rmqueue(z, order, gfp_mask);
-               if (page)
-                       goto got_pg;
-       }
-
-       for (i = 0; (z = zones[i]) != NULL; i++)
-               wakeup_kswapd(z, order);
+       alloc_flags = ALLOC_WMARK_MIN;
+       if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
+               alloc_flags |= ALLOC_HARDER;
+       if (gfp_mask & __GFP_HIGH)
+               alloc_flags |= ALLOC_HIGH;
+       if (wait)
+               alloc_flags |= ALLOC_CPUSET;
  
         /*
          * Go through the zonelist again. Let __GFP_HIGH and allocations
-        * coming from realtime tasks to go deeper into reserves
+        * coming from realtime tasks go deeper into reserves.
          *
          * This is the last chance, in general, before the goto nopage.
          * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
          * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
          */
-       for (i = 0; (z = zones[i]) != NULL; i++) {
-               if (!zone_watermark_ok(z, order, z->pages_min,
-                                      classzone_idx, can_try_harder,
-                                      gfp_mask & __GFP_HIGH))
-                       continue;
-
-               if (wait && !cpuset_zone_allowed(z, gfp_mask))
-                       continue;
-
-               page = buffered_rmqueue(z, order, gfp_mask);
-               if (page)
-                       goto got_pg;
-       }
+       page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
+       if (page)
+               goto got_pg;
  
         /* This allocation should allow future memory freeing. */
  
         if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
                         && !in_interrupt()) {
                 if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+nofail_alloc:
                         /* go through the zonelist yet again, ignoring mins */
-                       for (i = 0; (z = zones[i]) != NULL; i++) {
-                               if (!cpuset_zone_allowed(z, gfp_mask))
-                                       continue;
-                               page = buffered_rmqueue(z, order, gfp_mask);
-                               if (page)
-                                       goto got_pg;
+                       page = get_page_from_freelist(gfp_mask, order,
+                               zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
+                       if (page)
+                               goto got_pg;
+                       if (gfp_mask & __GFP_NOFAIL) {
+                               blk_congestion_wait(WRITE, HZ/50);
+                               goto nofail_alloc;
                         }
                 }
                 goto nopage;
@@ -896,7 +951,7 @@ rebalance:
         reclaim_state.reclaimed_slab = 0;
         p->reclaim_state = &reclaim_state;
  
-       did_some_progress = try_to_free_pages(zones, gfp_mask);
+       did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
  
         p->reclaim_state = NULL;
         p->flags &= ~PF_MEMALLOC;
@@ -904,19 +959,10 @@ rebalance:
         cond_resched();
  
         if (likely(did_some_progress)) {
-               for (i = 0; (z = zones[i]) != NULL; i++) {
-                       if (!zone_watermark_ok(z, order, z->pages_min,
-                                              classzone_idx, can_try_harder,
-                                              gfp_mask & __GFP_HIGH))
-                               continue;
-
-                       if (!cpuset_zone_allowed(z, gfp_mask))
-                               continue;
-
-                       page = buffered_rmqueue(z, order, gfp_mask);
-                       if (page)
-                               goto got_pg;
-               }
+               page = get_page_from_freelist(gfp_mask, order,
+                                               zonelist, alloc_flags);
+               if (page)
+                       goto got_pg;
         } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                 /*
                  * Go through the zonelist yet one more time, keep
@@ -924,18 +970,10 @@ rebalance:
                  * a parallel oom killing, we must fail if we're still
                  * under heavy pressure.
                  */
-               for (i = 0; (z = zones[i]) != NULL; i++) {
-                       if (!zone_watermark_ok(z, order, z->pages_high,
-                                              classzone_idx, 0, 0))
-                               continue;
-
-                       if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-                               continue;
-
-                       page = buffered_rmqueue(z, order, gfp_mask);
-                       if (page)
-                               goto got_pg;
-               }
+               page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+                               zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+               if (page)
+                       goto got_pg;
  
                 out_of_memory(gfp_mask, order);
                 goto restart;
@@ -968,9 +1006,7 @@ nopage:
                 dump_stack();
                 show_mem();
         }
-       return NULL;
  got_pg:
-       zone_statistics(zonelist, z);
         return page;
  }
  
@@ -998,7 +1034,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
          * get_zeroed_page() returns a 32-bit address, which cannot represent
          * a highmem page
          */
-       BUG_ON(gfp_mask & __GFP_HIGHMEM);
+       BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
  
         page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
         if (page)
@@ -1018,7 +1054,7 @@ void __pagevec_free(struct pagevec *pvec)
  
  fastcall void __free_pages(struct page *page, unsigned int order)
  {
-       if (!PageReserved(page) && put_page_testzero(page)) {
+       if (put_page_testzero(page)) {
                 if (order == 0)
                         free_hot_page(page);
                 else
@@ -1091,7 +1127,7 @@ static unsigned int nr_free_zone_pages(int offset)
   */
  unsigned int nr_free_buffer_pages(void)
  {
-       return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK);
+       return nr_free_zone_pages(gfp_zone(GFP_USER));
  }
  
  /*
@@ -1099,7 +1135,7 @@ unsigned int nr_free_buffer_pages(void)
   */
  unsigned int nr_free_pagecache_pages(void)
  {
-       return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+       return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
  }
  
  #ifdef CONFIG_HIGHMEM
@@ -1307,12 +1343,9 @@ void show_free_areas(void)
                 } else
                         printk("\n");
  
-               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               for_each_online_cpu(cpu) {
                         struct per_cpu_pageset *pageset;
  
-                       if (!cpu_possible(cpu))
-                               continue;
-
                         pageset = zone_pcp(zone, cpu);
  
                         for (temperature = 0; temperature < 2; temperature++)
@@ -1421,6 +1454,10 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
                 zone = pgdat->node_zones + ZONE_NORMAL;
                 if (zone->present_pages)
                         zonelist->zones[j++] = zone;
+       case ZONE_DMA32:
+               zone = pgdat->node_zones + ZONE_DMA32;
+               if (zone->present_pages)
+                       zonelist->zones[j++] = zone;
         case ZONE_DMA:
                 zone = pgdat->node_zones + ZONE_DMA;
                 if (zone->present_pages)
@@ -1430,6 +1467,18 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
         return j;
  }
  
+static inline int highest_zone(int zone_bits)
+{
+       int res = ZONE_NORMAL;
+       if (zone_bits & (__force int)__GFP_HIGHMEM)
+               res = ZONE_HIGHMEM;
+       if (zone_bits & (__force int)__GFP_DMA32)
+               res = ZONE_DMA32;
+       if (zone_bits & (__force int)__GFP_DMA)
+               res = ZONE_DMA;
+       return res;
+}
+
  #ifdef CONFIG_NUMA
  #define MAX_NODE_LOAD (num_online_nodes())
  static int __initdata node_load[MAX_NUMNODES];
@@ -1526,11 +1575,7 @@ static void __init build_zonelists(pg_data_t *pgdat)
                         zonelist = pgdat->node_zonelists + i;
                         for (j = 0; zonelist->zones[j] != NULL; j++);
  
-                       k = ZONE_NORMAL;
-                       if (i & __GFP_HIGHMEM)
-                               k = ZONE_HIGHMEM;
-                       if (i & __GFP_DMA)
-                               k = ZONE_DMA;
+                       k = highest_zone(i);
  
                         j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
                         zonelist->zones[j] = NULL;
@@ -1551,12 +1596,7 @@ static void __init build_zonelists(pg_data_t *pgdat)
                 zonelist = pgdat->node_zonelists + i;
  
                 j = 0;
-               k = ZONE_NORMAL;
-               if (i & __GFP_HIGHMEM)
-                       k = ZONE_HIGHMEM;
-               if (i & __GFP_DMA)
-                       k = ZONE_DMA;
-
+               k = highest_zone(i);
                 j = build_zonelists_node(pgdat, zonelist, j, k);
                 /*
                  * Now we build the zonelist so that it contains the zones
@@ -1661,7 +1701,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
   * up by free_all_bootmem() once the early boot process is
   * done. Non-atomic initialization, single-pass.
   */
-void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 unsigned long start_pfn)
  {
         struct page *page;
@@ -1675,7 +1715,7 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                         continue;
                 page = pfn_to_page(pfn);
                 set_page_links(page, zone, nid, pfn);
-               set_page_count(page, 0);
+               set_page_count(page, 1);
                 reset_page_mapcount(page);
                 SetPageReserved(page);
                 INIT_LIST_HEAD(&page->lru);
@@ -1722,14 +1762,13 @@ static int __devinit zone_batchsize(struct zone *zone)
  
         /*
          * The per-cpu-pages pools are set to around 1000th of the
-        * size of the zone.  But no more than 1/4 of a meg - there's
-        * no point in going beyond the size of L2 cache.
+        * size of the zone.  But no more than 1/2 of a meg.
          *
          * OK, so we don't know how big the cache is.  So guess.
          */
         batch = zone->present_pages / 1024;
-       if (batch * PAGE_SIZE > 256 * 1024)
-               batch = (256 * 1024) / PAGE_SIZE;
+       if (batch * PAGE_SIZE > 512 * 1024)
+               batch = (512 * 1024) / PAGE_SIZE;
         batch /= 4;             /* We effectively *= 4 below */
         if (batch < 1)
                 batch = 1;
@@ -1744,7 +1783,8 @@ static int __devinit zone_batchsize(struct zone *zone)
          * of pages of one half of the possible page colors
          * and the other with pages of the other colors.
          */
-       batch = (1 << fls(batch + batch/2)) - 1;
+       batch = (1 << (fls(batch + batch/2)-1)) - 1;
+
         return batch;
  }
  
@@ -1756,7 +1796,7 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
  
         pcp = &p->pcp[0];               /* hot */
         pcp->count = 0;
-       pcp->low = 2 * batch;
+       pcp->low = 0;
         pcp->high = 6 * batch;
         pcp->batch = max(1UL, 1 * batch);
         INIT_LIST_HEAD(&pcp->list);
@@ -1765,7 +1805,7 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
         pcp->count = 0;
         pcp->low = 0;
         pcp->high = 2 * batch;
-       pcp->batch = max(1UL, 1 * batch);
+       pcp->batch = max(1UL, batch/2);
         INIT_LIST_HEAD(&pcp->list);
  }
  
@@ -1845,11 +1885,10 @@ static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
                         if (process_zones(cpu))
                                 ret = NOTIFY_BAD;
                         break;
-#ifdef CONFIG_HOTPLUG_CPU
+               case CPU_UP_CANCELED:
                 case CPU_DEAD:
                         free_zone_pagesets(cpu);
                         break;
-#endif
                 default:
                         break;
         }
@@ -1859,7 +1898,7 @@ static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
  static struct notifier_block pageset_notifier =
         { &pageset_cpuup_callback, NULL, 0 };
  
-void __init setup_per_cpu_pageset()
+void __init setup_per_cpu_pageset(void)
  {
         int err;
  
@@ -1874,6 +1913,60 @@ void __init setup_per_cpu_pageset()
  
  #endif
  
+static __devinit
+void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
+{
+       int i;
+       struct pglist_data *pgdat = zone->zone_pgdat;
+
+       /*
+        * The per-page waitqueue mechanism uses hashed waitqueues
+        * per zone.
+        */
+       zone->wait_table_size = wait_table_size(zone_size_pages);
+       zone->wait_table_bits = wait_table_bits(zone->wait_table_size);
+       zone->wait_table = (wait_queue_head_t *)
+               alloc_bootmem_node(pgdat, zone->wait_table_size
+                                       * sizeof(wait_queue_head_t));
+
+       for(i = 0; i < zone->wait_table_size; ++i)
+               init_waitqueue_head(zone->wait_table + i);
+}
+
+static __devinit void zone_pcp_init(struct zone *zone)
+{
+       int cpu;
+       unsigned long batch = zone_batchsize(zone);
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+#ifdef CONFIG_NUMA
+               /* Early boot. Slab allocator not functional yet */
+               zone->pageset[cpu] = &boot_pageset[cpu];
+               setup_pageset(&boot_pageset[cpu],0);
+#else
+               setup_pageset(zone_pcp(zone,cpu), batch);
+#endif
+       }
+       printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
+               zone->name, zone->present_pages, batch);
+}
+
+static __devinit void init_currently_empty_zone(struct zone *zone,
+               unsigned long zone_start_pfn, unsigned long size)
+{
+       struct pglist_data *pgdat = zone->zone_pgdat;
+
+       zone_wait_table_init(zone, size);
+       pgdat->nr_zones = zone_idx(zone) + 1;
+
+       zone->zone_mem_map = pfn_to_page(zone_start_pfn);
+       zone->zone_start_pfn = zone_start_pfn;
+
+       memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
+
+       zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+}
+
  /*
   * Set up the zone data structures:
   *   - mark all pages reserved
@@ -1883,10 +1976,11 @@ void __init setup_per_cpu_pageset()
  static void __init free_area_init_core(struct pglist_data *pgdat,
                 unsigned long *zones_size, unsigned long *zholes_size)
  {
-       unsigned long i, j;
-       int cpu, nid = pgdat->node_id;
+       unsigned long j;
+       int nid = pgdat->node_id;
         unsigned long zone_start_pfn = pgdat->node_start_pfn;
  
+       pgdat_resize_init(pgdat);
         pgdat->nr_zones = 0;
         init_waitqueue_head(&pgdat->kswapd_wait);
         pgdat->kswapd_max_order = 0;
@@ -1894,13 +1988,12 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
         for (j = 0; j < MAX_NR_ZONES; j++) {
                 struct zone *zone = pgdat->node_zones + j;
                 unsigned long size, realsize;
-               unsigned long batch;
  
                 realsize = size = zones_size[j];
                 if (zholes_size)
                         realsize -= zholes_size[j];
  
-               if (j == ZONE_DMA || j == ZONE_NORMAL)
+               if (j < ZONE_HIGHMEM)
                         nr_kernel_pages += realsize;
                 nr_all_pages += realsize;
  
@@ -1909,24 +2002,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
                 zone->name = zone_names[j];
                 spin_lock_init(&zone->lock);
                 spin_lock_init(&zone->lru_lock);
+               zone_seqlock_init(zone);
                 zone->zone_pgdat = pgdat;
                 zone->free_pages = 0;
  
                 zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
  
-               batch = zone_batchsize(zone);
-
-               for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
-                       /* Early boot. Slab allocator not functional yet */
-                       zone->pageset[cpu] = &boot_pageset[cpu];
-                       setup_pageset(&boot_pageset[cpu],0);
-#else
-                       setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
-               }
-               printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-                               zone_names[j], realsize, batch);
+               zone_pcp_init(zone);
                 INIT_LIST_HEAD(&zone->active_list);
                 INIT_LIST_HEAD(&zone->inactive_list);
                 zone->nr_scan_active = 0;
@@ -1937,32 +2019,9 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
                 if (!size)
                         continue;
  
-               /*
-                * The per-page waitqueue mechanism uses hashed waitqueues
-                * per zone.
-                */
-               zone->wait_table_size = wait_table_size(size);
-               zone->wait_table_bits =
-                       wait_table_bits(zone->wait_table_size);
-               zone->wait_table = (wait_queue_head_t *)
-                       alloc_bootmem_node(pgdat, zone->wait_table_size
-                                               * sizeof(wait_queue_head_t));
-
-               for(i = 0; i < zone->wait_table_size; ++i)
-                       init_waitqueue_head(zone->wait_table + i);
-
-               pgdat->nr_zones = j+1;
-
-               zone->zone_mem_map = pfn_to_page(zone_start_pfn);
-               zone->zone_start_pfn = zone_start_pfn;
-
-               memmap_init(size, nid, j, zone_start_pfn);
-
                 zonetable_add(zone, nid, j, zone_start_pfn, size);
-
+               init_currently_empty_zone(zone, zone_start_pfn, size);
                 zone_start_pfn += size;
-
-               zone_init_free_lists(pgdat, zone, zone->spanned_pages);
         }
  }
  
@@ -2362,7 +2421,7 @@ static void setup_per_zone_lowmem_reserve(void)
   *     that the pages_{min,low,high} values for each zone are set correctly 
   *     with respect to min_free_kbytes.
   */
-static void setup_per_zone_pages_min(void)
+void setup_per_zone_pages_min(void)
  {
         unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
         unsigned long lowmem_pages = 0;
@@ -2376,13 +2435,18 @@ static void setup_per_zone_pages_min(void)
         }
  
         for_each_zone(zone) {
+               unsigned long tmp;
                 spin_lock_irqsave(&zone->lru_lock, flags);
+               tmp = (pages_min * zone->present_pages) / lowmem_pages;
                 if (is_highmem(zone)) {
                         /*
-                        * Often, highmem doesn't need to reserve any pages.
-                        * But the pages_min/low/high values are also used for
-                        * batching up page reclaim activity so we need a
-                        * decent value here.
+                        * __GFP_HIGH and PF_MEMALLOC allocations usually don't
+                        * need highmem pages, so cap pages_min to a small
+                        * value here.
+                        *
+                        * The (pages_high-pages_low) and (pages_low-pages_min)
+                        * deltas controls asynch page reclaim, and so should
+                        * not be capped for highmem.
                          */
                         int min_pages;
  
@@ -2393,19 +2457,15 @@ static void setup_per_zone_pages_min(void)
                                 min_pages = 128;
                         zone->pages_min = min_pages;
                 } else {
-                       /* if it's a lowmem zone, reserve a number of pages
+                       /*
+                        * If it's a lowmem zone, reserve a number of pages
                          * proportionate to the zone's size.
                          */
-                       zone->pages_min = (pages_min * zone->present_pages) /
-                                          lowmem_pages;
+                       zone->pages_min = tmp;
                 }
  
-               /*
-                * When interpreting these watermarks, just keep in mind that:
-                * zone->pages_min == (zone->pages_min * 4) / 4;
-                */
-               zone->pages_low   = (zone->pages_min * 5) / 4;
-               zone->pages_high  = (zone->pages_min * 6) / 4;
+               zone->pages_low   = zone->pages_min + tmp / 4;
+               zone->pages_high  = zone->pages_min + tmp / 2;
                 spin_unlock_irqrestore(&zone->lru_lock, flags);
         }
  }
diff --git a/linux-2.6-xen-sparse/net/core/dev.c b/linux-2.6-xen-sparse/net/core/dev.c

index 06d721eb6d573e5ea38497f10d2660d9836d58f1..e8811d2bdb73144348a90a7529088167855d4a0f 100644 (file)
--- a/linux-2.6-xen-sparse/net/core/dev.c
+++ b/linux-2.6-xen-sparse/net/core/dev.c
@@ -1114,6 +1114,19 @@ out:
         return ret;
  }
  
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+       if (net_ratelimit()) {
+               printk(KERN_ERR "%s: hw csum failure.\n", 
+                       dev ? dev->name : "<unknown>");
+               dump_stack();
+       }
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
+
  #ifdef CONFIG_HIGHMEM
  /* Actually, we should eliminate this check as soon as we know, that:
   * 1. IOMMU is present and allows to map all the memory.
@@ -2767,6 +2780,20 @@ int register_netdevice(struct net_device *dev)
                        dev->name);
                 dev->features &= ~NETIF_F_TSO;
         }
+       if (dev->features & NETIF_F_UFO) {
+               if (!(dev->features & NETIF_F_HW_CSUM)) {
+                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+                                       "NETIF_F_HW_CSUM feature.\n",
+                                                       dev->name);
+                       dev->features &= ~NETIF_F_UFO;
+               }
+               if (!(dev->features & NETIF_F_SG)) {
+                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+                                       "NETIF_F_SG feature.\n",
+                                       dev->name);
+                       dev->features &= ~NETIF_F_UFO;
+               }
+       }
  
         /*
          *      nil rebuild_header routine,
diff --git a/linux-2.6-xen-sparse/net/core/skbuff.c b/linux-2.6-xen-sparse/net/core/skbuff.c

index a1504f54b65b6a25380482d910adf0b6fe81ec05..7cd17430e292f6bb25b1d3689fa12834e7e5318b 100644 (file)
--- a/linux-2.6-xen-sparse/net/core/skbuff.c
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c
@@ -122,6 +122,8 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   *     __alloc_skb     -       allocate a network buffer
   *     @size: size to allocate
   *     @gfp_mask: allocation mask
+ *     @fclone: allocate from fclone cache instead of head cache
+ *             and allocate a cloned (child) skb
   *
   *     Allocate a new &sk_buff. The returned buffer has no headroom and a
   *     tail room of size bytes. The object has a reference count of one.
@@ -175,6 +177,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
         skb_shinfo(skb)->tso_size = 0;
         skb_shinfo(skb)->tso_segs = 0;
         skb_shinfo(skb)->frag_list = NULL;
+       skb_shinfo(skb)->ufo_size = 0;
+       skb_shinfo(skb)->ip6_frag_id = 0;
  out:
         return skb;
  nodata:
@@ -247,6 +251,8 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
         skb_shinfo(skb)->tso_size = 0;
         skb_shinfo(skb)->tso_segs = 0;
         skb_shinfo(skb)->frag_list = NULL;
+       skb_shinfo(skb)->ufo_size = 0;
+       skb_shinfo(skb)->ip6_frag_id = 0;
  out:
         return skb;
  nodata:
@@ -354,6 +360,9 @@ void __kfree_skb(struct sk_buff *skb)
         }
  #ifdef CONFIG_NETFILTER
         nf_conntrack_put(skb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
  #ifdef CONFIG_BRIDGE_NETFILTER
         nf_bridge_put(skb->nf_bridge);
  #endif
@@ -436,9 +445,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
         C(nfct);
         nf_conntrack_get(skb->nfct);
         C(nfctinfo);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
  #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
         C(ipvs_property);
  #endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
  #ifdef CONFIG_BRIDGE_NETFILTER
         C(nf_bridge);
         nf_bridge_get(skb->nf_bridge);
@@ -496,6 +513,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
         new->nfct       = old->nfct;
         nf_conntrack_get(old->nfct);
         new->nfctinfo   = old->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       new->nfct_reasm = old->nfct_reasm;
+       nf_conntrack_get_reasm(old->nfct_reasm);
+#endif
  #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
         new->ipvs_property = old->ipvs_property;
  #endif
@@ -1720,6 +1741,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
         return textsearch_find(config, state);
  }
  
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock  structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns  -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+                       int (*getfrag)(void *from, char *to, int offset,
+                                       int len, int odd, struct sk_buff *skb),
+                       void *from, int length)
+{
+       int frg_cnt = 0;
+       skb_frag_t *frag = NULL;
+       struct page *page = NULL;
+       int copy, left;
+       int offset = 0;
+       int ret;
+
+       do {
+               /* Return error if we don't have space for new frag */
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               if (frg_cnt >= MAX_SKB_FRAGS)
+                       return -EFAULT;
+
+               /* allocate a new page for next frag */
+               page = alloc_pages(sk->sk_allocation, 0);
+
+               /* If alloc_page fails just return failure and caller will
+                * free previous allocated pages by doing kfree_skb()
+                */
+               if (page == NULL)
+                       return -ENOMEM;
+
+               /* initialize the next frag */
+               sk->sk_sndmsg_page = page;
+               sk->sk_sndmsg_off = 0;
+               skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+               skb->truesize += PAGE_SIZE;
+               atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+               /* get the new initialized frag */
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+               /* copy the user data to page */
+               left = PAGE_SIZE - frag->page_offset;
+               copy = (length > left)? left : length;
+
+               ret = getfrag(from, (page_address(frag->page) +
+                           frag->page_offset + frag->size),
+                           offset, copy, 0, skb);
+               if (ret < 0)
+                       return -EFAULT;
+
+               /* copy was successful so update the size parameters */
+               sk->sk_sndmsg_off += copy;
+               frag->size += copy;
+               skb->len += copy;
+               skb->data_len += copy;
+               offset += copy;
+               length -= copy;
+
+       } while (length > 0);
+
+       return 0;
+}
+
  void __init skb_init(void)
  {
         skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1771,3 +1864,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  EXPORT_SYMBOL(skb_seq_read);
  EXPORT_SYMBOL(skb_abort_seq_read);
  EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_append_datato_frags);
author	cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
	Wed, 1 Feb 2006 18:00:19 +0000 (18:00 +0000)
committer	cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
	Wed, 1 Feb 2006 18:00:19 +0000 (18:00 +0000)
buildconfigs/linux-defconfig_xen0_x86_32		patch \| blob \| history
buildconfigs/linux-defconfig_xen0_x86_64		patch \| blob \| history
buildconfigs/linux-defconfig_xenU_x86_32		patch \| blob \| history
buildconfigs/linux-defconfig_xenU_x86_64		patch \| blob \| history
buildconfigs/linux-defconfig_xen_x86_32		patch \| blob \| history
buildconfigs/linux-defconfig_xen_x86_64		patch \| blob \| history
buildconfigs/mk.linux-2.6-xen		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/apm.c	[deleted file]	patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/ldt-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/traps.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/pci/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/power/Makefile	[new file with mode: 0644]	patch \| blob
linux-2.6-xen-sparse/arch/um/kernel/physmem.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/pci-nommu-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/pci/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/drivers/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/drivers/acpi/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/mem.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/tpm/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/tpm/tpm.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/tpm/tpm.h		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/tty_io.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/firmware/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/drivers/serial/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c		patch \| blob \| history
linux-2.6-xen-sparse/fs/Kconfig		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/atomic.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/param.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/rwsem.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/system.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-um/page.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hw_irq.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/param.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/io_ports.h	[deleted file]	patch \| blob \| history
linux-2.6-xen-sparse/include/linux/gfp.h		patch \| blob \| history
linux-2.6-xen-sparse/include/linux/irq.h		patch \| blob \| history
linux-2.6-xen-sparse/include/linux/mm.h		patch \| blob \| history
linux-2.6-xen-sparse/include/linux/skbuff.h		patch \| blob \| history
linux-2.6-xen-sparse/kernel/irq/manage.c		patch \| blob \| history
linux-2.6-xen-sparse/lib/Kconfig.debug		patch \| blob \| history
linux-2.6-xen-sparse/lib/Makefile	[new file with mode: 0644]	patch \| blob
linux-2.6-xen-sparse/mm/Kconfig	[new file with mode: 0644]	patch \| blob
linux-2.6-xen-sparse/mm/highmem.c		patch \| blob \| history
linux-2.6-xen-sparse/mm/memory.c		patch \| blob \| history
linux-2.6-xen-sparse/mm/mmap.c		patch \| blob \| history
linux-2.6-xen-sparse/mm/page_alloc.c		patch \| blob \| history
linux-2.6-xen-sparse/net/core/dev.c		patch \| blob \| history
linux-2.6-xen-sparse/net/core/skbuff.c		patch \| blob \| history